mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-01-05 19:10:10 +09:00
Merge branch 'ytdl-org:master' into df-sbs-extractor-ovrhaul
This commit is contained in:
commit
3abae00087
468
.github/workflows/ci.yml
vendored
468
.github/workflows/ci.yml
vendored
@ -1,81 +1,479 @@
|
|||||||
name: CI
|
name: CI
|
||||||
on: [push, pull_request]
|
|
||||||
|
env:
|
||||||
|
all-cpython-versions: 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10, 3.11, 3.12
|
||||||
|
main-cpython-versions: 2.7, 3.2, 3.5, 3.9, 3.11
|
||||||
|
pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
|
||||||
|
cpython-versions: main
|
||||||
|
test-set: core
|
||||||
|
# Python beta version to be built using pyenv before setup-python support
|
||||||
|
# Must also be included in all-cpython-versions
|
||||||
|
next: 3.13
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
# push inputs aren't known to GitHub
|
||||||
|
inputs:
|
||||||
|
cpython-versions:
|
||||||
|
type: string
|
||||||
|
default: all
|
||||||
|
test-set:
|
||||||
|
type: string
|
||||||
|
default: core
|
||||||
|
pull_request:
|
||||||
|
# pull_request inputs aren't known to GitHub
|
||||||
|
inputs:
|
||||||
|
cpython-versions:
|
||||||
|
type: string
|
||||||
|
default: main
|
||||||
|
test-set:
|
||||||
|
type: string
|
||||||
|
default: both
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
cpython-versions:
|
||||||
|
type: choice
|
||||||
|
description: CPython versions (main = 2.7, 3.2, 3.5, 3.9, 3.11)
|
||||||
|
options:
|
||||||
|
- all
|
||||||
|
- main
|
||||||
|
required: true
|
||||||
|
default: main
|
||||||
|
test-set:
|
||||||
|
type: choice
|
||||||
|
description: core, download
|
||||||
|
options:
|
||||||
|
- both
|
||||||
|
- core
|
||||||
|
- download
|
||||||
|
required: true
|
||||||
|
default: both
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
select:
|
||||||
|
name: Select tests from inputs
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
cpython-versions: ${{ steps.run.outputs.cpython-versions }}
|
||||||
|
test-set: ${{ steps.run.outputs.test-set }}
|
||||||
|
own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
|
||||||
|
steps:
|
||||||
|
# push and pull_request inputs aren't known to GitHub (pt3)
|
||||||
|
- name: Set push defaults
|
||||||
|
if: ${{ github.event_name == 'push' }}
|
||||||
|
env:
|
||||||
|
cpython-versions: all
|
||||||
|
test-set: core
|
||||||
|
run: |
|
||||||
|
echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
|
||||||
|
echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
|
||||||
|
- name: Get pull_request inputs
|
||||||
|
if: ${{ github.event_name == 'pull_request' }}
|
||||||
|
env:
|
||||||
|
cpython-versions: main
|
||||||
|
test-set: both
|
||||||
|
run: |
|
||||||
|
echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
|
||||||
|
echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
|
||||||
|
- name: Make version array
|
||||||
|
id: run
|
||||||
|
run: |
|
||||||
|
# Make a JSON Array from comma/space-separated string (no extra escaping)
|
||||||
|
json_list() { \
|
||||||
|
ret=""; IFS="${IFS},"; set -- $*; \
|
||||||
|
for a in "$@"; do \
|
||||||
|
ret=$(printf '%s"%s"' "${ret}${ret:+, }" "$a"); \
|
||||||
|
done; \
|
||||||
|
printf '[%s]' "$ret"; }
|
||||||
|
tests="${{ inputs.test-set || env.test-set }}"
|
||||||
|
[ $tests = both ] && tests="core download"
|
||||||
|
printf 'test-set=%s\n' "$(json_list $tests)" >> "$GITHUB_OUTPUT"
|
||||||
|
versions="${{ inputs.cpython-versions || env.cpython-versions }}"
|
||||||
|
if [ "$versions" = all ]; then \
|
||||||
|
versions="${{ env.all-cpython-versions }}"; else \
|
||||||
|
versions="${{ env.main-cpython-versions }}"; \
|
||||||
|
fi
|
||||||
|
printf 'cpython-versions=%s\n' \
|
||||||
|
"$(json_list ${versions}${versions:+, }${{ env.pypy-versions }})" >> "$GITHUB_OUTPUT"
|
||||||
|
# versions with a special get-pip.py in a per-version subdirectory
|
||||||
|
printf 'own-pip-versions=%s\n' \
|
||||||
|
"$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
tests:
|
tests:
|
||||||
name: Tests
|
name: Run tests
|
||||||
|
needs: select
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
env:
|
||||||
|
PIP: python -m pip
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK: true
|
||||||
|
PIP_NO_PYTHON_VERSION_WARNING: true
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-18.04]
|
os: [ubuntu-20.04]
|
||||||
# TODO: python 2.6
|
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
|
||||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
|
||||||
python-impl: [cpython]
|
python-impl: [cpython]
|
||||||
ytdl-test-set: [core, download]
|
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
|
||||||
run-tests-ext: [sh]
|
run-tests-ext: [sh]
|
||||||
include:
|
include:
|
||||||
# python 3.2 is only available on windows via setup-python
|
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
python-version: 3.2
|
python-version: 3.4
|
||||||
python-impl: cpython
|
python-impl: cpython
|
||||||
ytdl-test-set: core
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
python-version: 3.2
|
python-version: 3.4
|
||||||
python-impl: cpython
|
python-impl: cpython
|
||||||
ytdl-test-set: download
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||||
run-tests-ext: bat
|
run-tests-ext: bat
|
||||||
# jython
|
# jython
|
||||||
- os: ubuntu-18.04
|
- os: ubuntu-20.04
|
||||||
|
python-version: 2.7
|
||||||
python-impl: jython
|
python-impl: jython
|
||||||
ytdl-test-set: core
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||||
run-tests-ext: sh
|
run-tests-ext: sh
|
||||||
- os: ubuntu-18.04
|
- os: ubuntu-20.04
|
||||||
|
python-version: 2.7
|
||||||
python-impl: jython
|
python-impl: jython
|
||||||
ytdl-test-set: download
|
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||||
run-tests-ext: sh
|
run-tests-ext: sh
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Prepare Linux
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
if: ${{ startswith(matrix.os, 'ubuntu') }}
|
||||||
uses: actions/setup-python@v2
|
shell: bash
|
||||||
if: ${{ matrix.python-impl == 'cpython' }}
|
run: |
|
||||||
|
# apt in runner, if needed, may not be up-to-date
|
||||||
|
sudo apt-get update
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
#-------- Python 3 -----
|
||||||
|
- name: Set up supported Python ${{ matrix.python-version }}
|
||||||
|
id: setup-python
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }}
|
||||||
|
# wrap broken actions/setup-python@v4
|
||||||
|
# NB may run apt-get install in Linux
|
||||||
|
uses: ytdl-org/setup-python@v1
|
||||||
|
env:
|
||||||
|
# Temporary workaround for Python 3.5 failures - May 2024
|
||||||
|
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache-build: true
|
||||||
|
allow-build: info
|
||||||
|
- name: Locate supported Python ${{ matrix.python-version }}
|
||||||
|
if: ${{ env.pythonLocation }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "PYTHONHOME=${pythonLocation}" >> "$GITHUB_ENV"
|
||||||
|
export expected="${{ steps.setup-python.outputs.python-path }}"
|
||||||
|
dirname() { printf '%s\n' \
|
||||||
|
'import os, sys' \
|
||||||
|
'print(os.path.dirname(sys.argv[1]))' \
|
||||||
|
| ${expected} - "$1"; }
|
||||||
|
expd="$(dirname "$expected")"
|
||||||
|
export python="$(command -v python)"
|
||||||
|
[ "$expd" = "$(dirname "$python")" ] || echo "PATH=$expd:${PATH}" >> "$GITHUB_ENV"
|
||||||
|
[ -x "$python" ] || printf '%s\n' \
|
||||||
|
'import os' \
|
||||||
|
'exp = os.environ["expected"]' \
|
||||||
|
'python = os.environ["python"]' \
|
||||||
|
'exps = os.path.split(exp)' \
|
||||||
|
'if python and (os.path.dirname(python) == exp[0]):' \
|
||||||
|
' exit(0)' \
|
||||||
|
'exps[1] = "python" + os.path.splitext(exps[1])[1]' \
|
||||||
|
'python = os.path.join(*exps)' \
|
||||||
|
'try:' \
|
||||||
|
' os.symlink(exp, python)' \
|
||||||
|
'except AttributeError:' \
|
||||||
|
' os.rename(exp, python)' \
|
||||||
|
| ${expected} -
|
||||||
|
printf '%s\n' \
|
||||||
|
'import sys' \
|
||||||
|
'print(sys.path)' \
|
||||||
|
| ${expected} -
|
||||||
|
#-------- Python next (was 3.12) -
|
||||||
|
- name: Set up CPython 3.next environment
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PYENV_ROOT=$HOME/.local/share/pyenv
|
||||||
|
echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
|
||||||
|
- name: Cache Python 3.next
|
||||||
|
id: cachenext
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
key: python-${{ env.next }}
|
||||||
|
path: |
|
||||||
|
${{ env.PYENV_ROOT }}
|
||||||
|
- name: Build and set up Python 3.next
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }}
|
||||||
|
# dl and build locally
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Install build environment
|
||||||
|
sudo apt-get install -y build-essential llvm libssl-dev tk-dev \
|
||||||
|
libncursesw5-dev libreadline-dev libsqlite3-dev \
|
||||||
|
libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
|
||||||
|
# Download PyEnv from its GitHub repository.
|
||||||
|
export PYENV_ROOT=${{ env.PYENV_ROOT }}
|
||||||
|
export PATH=$PYENV_ROOT/bin:$PATH
|
||||||
|
git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
|
||||||
|
pyenv install ${{ env.next }}
|
||||||
|
- name: Locate Python 3.next
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)"
|
||||||
|
test -n "$PYTHONHOME"
|
||||||
|
echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
|
||||||
|
echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
|
||||||
|
#-------- Python 2.7 --
|
||||||
|
- name: Set up Python 2.7
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.7' }}
|
||||||
|
# install 2.7
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y python2 python-is-python2
|
||||||
|
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
|
||||||
|
#-------- Python 2.6 --
|
||||||
|
- name: Set up Python 2.6 environment
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
openssl_name=openssl-1.0.2u
|
||||||
|
echo "openssl_name=${openssl_name}" >> "$GITHUB_ENV"
|
||||||
|
openssl_dir=$HOME/.local/opt/$openssl_name
|
||||||
|
echo "openssl_dir=${openssl_dir}" >> "$GITHUB_ENV"
|
||||||
|
PYENV_ROOT=$HOME/.local/share/pyenv
|
||||||
|
echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
|
||||||
|
sudo apt-get install -y openssl ca-certificates
|
||||||
|
- name: Cache Python 2.6
|
||||||
|
id: cache26
|
||||||
|
if: ${{ matrix.python-version == '2.6' }}
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
key: python-2.6.9
|
||||||
|
path: |
|
||||||
|
${{ env.openssl_dir }}
|
||||||
|
${{ env.PYENV_ROOT }}
|
||||||
|
- name: Build and set up Python 2.6
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' && ! steps.cache26.outputs.cache-hit }}
|
||||||
|
# dl and build locally
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Install build environment
|
||||||
|
sudo apt-get install -y build-essential llvm libssl-dev tk-dev \
|
||||||
|
libncursesw5-dev libreadline-dev libsqlite3-dev \
|
||||||
|
libffi-dev xz-utils zlib1g-dev libbz2-dev liblzma-dev
|
||||||
|
# Download and install OpenSSL 1.0.2, back in time
|
||||||
|
openssl_name=${{ env.openssl_name }}
|
||||||
|
openssl_targz=${openssl_name}.tar.gz
|
||||||
|
openssl_dir=${{ env.openssl_dir }}
|
||||||
|
openssl_inc=$openssl_dir/include
|
||||||
|
openssl_lib=$openssl_dir/lib
|
||||||
|
openssl_ssl=$openssl_dir/ssl
|
||||||
|
curl -L "https://www.openssl.org/source/$openssl_targz" -o $openssl_targz
|
||||||
|
tar -xf $openssl_targz
|
||||||
|
( cd $openssl_name; \
|
||||||
|
./config --prefix=$openssl_dir --openssldir=${openssl_dir}/ssl \
|
||||||
|
--libdir=lib -Wl,-rpath=${openssl_dir}/lib shared zlib-dynamic && \
|
||||||
|
make && \
|
||||||
|
make install )
|
||||||
|
rm -rf $openssl_name
|
||||||
|
rmdir $openssl_ssl/certs && ln -s /etc/ssl/certs $openssl_ssl/certs
|
||||||
|
# Download PyEnv from its GitHub repository.
|
||||||
|
export PYENV_ROOT=${{ env.PYENV_ROOT }}
|
||||||
|
export PATH=$PYENV_ROOT/bin:$PATH
|
||||||
|
git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
|
||||||
|
# Prevent pyenv build trying (and failing) to update pip
|
||||||
|
export GET_PIP=get-pip-2.6.py
|
||||||
|
echo 'import sys; sys.exit(0)' > ${GET_PIP}
|
||||||
|
GET_PIP=$(realpath $GET_PIP)
|
||||||
|
# Build and install Python
|
||||||
|
export CFLAGS="-I$openssl_inc"
|
||||||
|
export LDFLAGS="-L$openssl_lib"
|
||||||
|
export LD_LIBRARY_PATH="$openssl_lib"
|
||||||
|
pyenv install 2.6.9
|
||||||
|
- name: Locate Python 2.6
|
||||||
|
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PYTHONHOME="${{ env.PYENV_ROOT }}/versions/2.6.9"
|
||||||
|
echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
|
||||||
|
echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
|
||||||
|
echo "LD_LIBRARY_PATH=${{ env.openssl_dir }}/lib${LD_LIBRARY_PATH:+:}${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
|
||||||
|
#-------- Jython ------
|
||||||
- name: Set up Java 8
|
- name: Set up Java 8
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
uses: actions/setup-java@v1
|
uses: actions/setup-java@v3
|
||||||
with:
|
with:
|
||||||
java-version: 8
|
java-version: 8
|
||||||
|
distribution: 'zulu'
|
||||||
|
- name: Setup Jython environment
|
||||||
|
if: ${{ matrix.python-impl == 'jython' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "JYTHON_ROOT=${HOME}/jython" >> "$GITHUB_ENV"
|
||||||
|
echo "PIP=pip" >> "$GITHUB_ENV"
|
||||||
|
- name: Cache Jython
|
||||||
|
id: cachejy
|
||||||
|
if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' }}
|
||||||
|
uses: actions/cache@v3
|
||||||
|
with:
|
||||||
|
# 2.7.3 now available, may solve SNI issue
|
||||||
|
key: jython-2.7.1
|
||||||
|
path: |
|
||||||
|
${{ env.JYTHON_ROOT }}
|
||||||
- name: Install Jython
|
- name: Install Jython
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
if: ${{ matrix.python-impl == 'jython' && matrix.python-version == '2.7' && ! steps.cachejy.outputs.cache-hit }}
|
||||||
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
wget https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
|
||||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
curl -L "https://repo1.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar" -o jython-installer.jar
|
||||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
java -jar jython-installer.jar -s -d "${JYTHON_ROOT}"
|
||||||
- name: Install nose
|
echo "${JYTHON_ROOT}/bin" >> "$GITHUB_PATH"
|
||||||
if: ${{ matrix.python-impl != 'jython' }}
|
- name: Set up cached Jython
|
||||||
run: pip install nose
|
if: ${{ steps.cachejy.outputs.cache-hit }}
|
||||||
- name: Install nose (Jython)
|
shell: bash
|
||||||
if: ${{ matrix.python-impl == 'jython' }}
|
|
||||||
# Working around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
|
||||||
run: |
|
run: |
|
||||||
wget https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl
|
JYTHON_ROOT="${{ env.JYTHON_ROOT }}"
|
||||||
pip install nose-1.3.7-py2-none-any.whl
|
echo "${JYTHON_ROOT}/bin" >> $GITHUB_PATH
|
||||||
|
- name: Install supporting Python 2.7 if possible
|
||||||
|
if: ${{ steps.cachejy.outputs.cache-hit }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt-get install -y python2.7 || true
|
||||||
|
#-------- pip ---------
|
||||||
|
- name: Set up supported Python ${{ matrix.python-version }} pip
|
||||||
|
if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || matrix.python-version == '2.7' }}
|
||||||
|
# This step may run in either Linux or Windows
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "$PATH"
|
||||||
|
echo "$PYTHONHOME"
|
||||||
|
# curl is available on both Windows and Linux, -L follows redirects, -O gets name
|
||||||
|
python -m ensurepip || python -m pip --version || { \
|
||||||
|
get_pip="${{ contains(needs.select.outputs.own-pip-versions, matrix.python-version) && format('{0}/', matrix.python-version) || '' }}"; \
|
||||||
|
curl -L -O "https://bootstrap.pypa.io/pip/${get_pip}get-pip.py"; \
|
||||||
|
python get-pip.py; }
|
||||||
|
- name: Set up Python 2.6 pip
|
||||||
|
if: ${{ matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
python -m pip --version || { \
|
||||||
|
curl -L -O "https://bootstrap.pypa.io/pip/2.6/get-pip.py"; \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/ac/95/a05b56bb975efa78d3557efa36acaf9cf5d2fd0ee0062060493687432e03/pip-9.0.3-py2.py3-none-any.whl"; \
|
||||||
|
python get-pip.py --no-setuptools --no-wheel pip-9.0.3-py2.py3-none-any.whl; }
|
||||||
|
# work-around to invoke pip module on 2.6: https://bugs.python.org/issue2751
|
||||||
|
echo "PIP=python -m pip.__main__" >> "$GITHUB_ENV"
|
||||||
|
- name: Set up other Python ${{ matrix.python-version }} pip
|
||||||
|
if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
python -m pip --version || { \
|
||||||
|
curl -L -O "https://bootstrap.pypa.io/pip/3.2/get-pip.py"; \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/b2/d0/cd115fe345dd6f07ec1c780020a7dfe74966fceeb171e0f20d1d4905b0b7/pip-7.1.2-py2.py3-none-any.whl"; \
|
||||||
|
python get-pip.py --no-setuptools --no-wheel pip-7.1.2-py2.py3-none-any.whl; }
|
||||||
|
#-------- unittest ----
|
||||||
|
- name: Upgrade Unittest for Python 2.6
|
||||||
|
if: ${{ matrix.python-version == '2.6' }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||||
|
$PIP -qq show unittest2 || { \
|
||||||
|
for u in "65/26/32b8464df2a97e6dd1b656ed26b2c194606c16fe163c695a992b36c11cdf/six-1.13.0-py2.py3-none-any.whl" \
|
||||||
|
"f2/94/3af39d34be01a24a6e65433d19e107099374224905f1e0cc6bbe1fd22a2f/argparse-1.4.0-py2.py3-none-any.whl" \
|
||||||
|
"c7/a3/c5da2a44c85bfbb6eebcfc1dde24933f8704441b98fdde6528f4831757a6/linecache2-1.0.0-py2.py3-none-any.whl" \
|
||||||
|
"17/0a/6ac05a3723017a967193456a2efa0aa9ac4b51456891af1e2353bb9de21e/traceback2-1.4.0-py2.py3-none-any.whl" \
|
||||||
|
"72/20/7f0f433060a962200b7272b8c12ba90ef5b903e218174301d0abfd523813/unittest2-1.1.0-py2.py3-none-any.whl"; do \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/${u}"; \
|
||||||
|
$PIP install ${u##*/}; \
|
||||||
|
done; }
|
||||||
|
# make tests use unittest2
|
||||||
|
for test in ./test/test_*.py ./test/helper.py; do
|
||||||
|
sed -r -i -e '/^import unittest$/s/test/test2 as unittest/' "$test"
|
||||||
|
done
|
||||||
|
#-------- nose --------
|
||||||
|
- name: Install nose for Python ${{ matrix.python-version }}
|
||||||
|
if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "$PATH"
|
||||||
|
echo "$PYTHONHOME"
|
||||||
|
# Use PyNose for recent Pythons instead of Nose
|
||||||
|
py3ver="${{ matrix.python-version }}"
|
||||||
|
py3ver=${py3ver#3.}
|
||||||
|
[ "$py3ver" != "${{ matrix.python-version }}" ] && py3ver=${py3ver%.*} || py3ver=0
|
||||||
|
[ "$py3ver" -ge 9 ] && nose=pynose || nose=nose
|
||||||
|
$PIP -qq show $nose || $PIP install $nose
|
||||||
|
- name: Install nose for other Python 2
|
||||||
|
if: ${{ matrix.python-impl == 'jython' || (matrix.python-impl == 'cpython' && matrix.python-version == '2.6') }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Work around deprecation of support for non-SNI clients at PyPI CDN (see https://status.python.org/incidents/hzmjhqsdjqgb)
|
||||||
|
$PIP -qq show nose || { \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/99/4f/13fb671119e65c4dce97c60e67d3fd9e6f7f809f2b307e2611f4701205cb/nose-1.3.7-py2-none-any.whl"; \
|
||||||
|
$PIP install nose-1.3.7-py2-none-any.whl; }
|
||||||
|
- name: Install nose for other Python 3
|
||||||
|
if: ${{ matrix.python-version == '3.2' && steps.setup-python.outputs.python-path }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
$PIP -qq show nose || { \
|
||||||
|
curl -L -O "https://files.pythonhosted.org/packages/15/d8/dd071918c040f50fa1cf80da16423af51ff8ce4a0f2399b7bf8de45ac3d9/nose-1.3.7-py3-none-any.whl"; \
|
||||||
|
$PIP install nose-1.3.7-py3-none-any.whl; }
|
||||||
|
- name: Set up nosetest test
|
||||||
|
if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# set PYTHON_VER
|
||||||
|
PYTHON_VER=${{ matrix.python-version }}
|
||||||
|
[ "${PYTHON_VER#*-}" != "$PYTHON_VER" ] || PYTHON_VER="${{ matrix.python-impl }}-${PYTHON_VER}"
|
||||||
|
echo "PYTHON_VER=$PYTHON_VER" >> "$GITHUB_ENV"
|
||||||
|
echo "PYTHON_IMPL=${{ matrix.python-impl }}" >> "$GITHUB_ENV"
|
||||||
|
# define a test to validate the Python version used by nosetests
|
||||||
|
printf '%s\n' \
|
||||||
|
'from __future__ import unicode_literals' \
|
||||||
|
'import sys, os, platform' \
|
||||||
|
'try:' \
|
||||||
|
' import unittest2 as unittest' \
|
||||||
|
'except ImportError:' \
|
||||||
|
' import unittest' \
|
||||||
|
'class TestPython(unittest.TestCase):' \
|
||||||
|
' def setUp(self):' \
|
||||||
|
' self.ver = os.environ["PYTHON_VER"].split("-")' \
|
||||||
|
' def test_python_ver(self):' \
|
||||||
|
' self.assertEqual(["%d" % v for v in sys.version_info[:2]], self.ver[-1].split(".")[:2])' \
|
||||||
|
' self.assertTrue(sys.version.startswith(self.ver[-1]))' \
|
||||||
|
' self.assertIn(self.ver[0], ",".join((sys.version, platform.python_implementation())).lower())' \
|
||||||
|
' def test_python_impl(self):' \
|
||||||
|
' self.assertIn(platform.python_implementation().lower(), (os.environ["PYTHON_IMPL"], self.ver[0]))' \
|
||||||
|
> test/test_python.py
|
||||||
|
#-------- TESTS -------
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
if: ${{ contains(needs.select.outputs.test-set, matrix.ytdl-test-set ) }}
|
||||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||||
env:
|
env:
|
||||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
run: |
|
||||||
|
./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||||
flake8:
|
flake8:
|
||||||
name: Linter
|
name: Linter
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: 3.9
|
python-version: 3.9
|
||||||
- name: Install flake8
|
- name: Install flake8
|
||||||
run: pip install flake8
|
run: pip install flake8
|
||||||
- name: Run flake8
|
- name: Run flake8
|
||||||
run: flake8 .
|
run: flake8 .
|
||||||
|
|
||||||
|
95
README.md
95
README.md
@ -1000,6 +1000,8 @@ To run the test, simply invoke your favorite test runner, or execute a test file
|
|||||||
python test/test_download.py
|
python test/test_download.py
|
||||||
nosetests
|
nosetests
|
||||||
|
|
||||||
|
For Python versions 3.6 and later, you can use [pynose](https://pypi.org/project/pynose/) to implement `nosetests`. The original [nose](https://pypi.org/project/nose/) has not been upgraded for 3.10 and later.
|
||||||
|
|
||||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||||
|
|
||||||
If you want to create a build of youtube-dl yourself, you'll need
|
If you want to create a build of youtube-dl yourself, you'll need
|
||||||
@ -1091,7 +1093,7 @@ In any case, thank you very much for your contributions!
|
|||||||
|
|
||||||
## youtube-dl coding conventions
|
## youtube-dl coding conventions
|
||||||
|
|
||||||
This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
|
This section introduces guidelines for writing idiomatic, robust and future-proof extractor code.
|
||||||
|
|
||||||
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
|
||||||
|
|
||||||
@ -1331,7 +1333,7 @@ Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`]
|
|||||||
|
|
||||||
Use `url_or_none` for safe URL processing.
|
Use `url_or_none` for safe URL processing.
|
||||||
|
|
||||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
Use `traverse_obj` for safe metadata extraction from parsed JSON.
|
||||||
|
|
||||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||||
|
|
||||||
@ -1340,18 +1342,105 @@ Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/mast
|
|||||||
#### More examples
|
#### More examples
|
||||||
|
|
||||||
##### Safely extract optional description from parsed JSON
|
##### Safely extract optional description from parsed JSON
|
||||||
|
|
||||||
|
When processing complex JSON, as often returned by site API requests or stashed in web pages for "hydration", you can use the `traverse_obj()` utility function to handle multiple fallback values and to ensure the expected type of metadata items. The function's docstring defines how the function works: also review usage in the codebase for more examples.
|
||||||
|
|
||||||
|
In this example, a text `description`, or `None`, is pulled from the `.result.video[0].summary` member of the parsed JSON `response`, if available.
|
||||||
|
|
||||||
|
```python
|
||||||
|
description = traverse_obj(response, ('result', 'video', 0, 'summary', T(compat_str)))
|
||||||
|
```
|
||||||
|
`T(...)` is a shorthand for a set literal; if you hate people who still run Python 2.6, `T(type_or_transformation)` could be written as a set literal `{type_or_transformation}`.
|
||||||
|
|
||||||
|
Some extractors use the older and less capable `try_get()` function in the same way.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||||
```
|
```
|
||||||
|
|
||||||
##### Safely extract more optional metadata
|
##### Safely extract more optional metadata
|
||||||
|
|
||||||
|
In this example, various optional metadata values are extracted from the `.result.video[0]` member of the parsed JSON `response`, which is expected to be a JS object, parsed into a `dict`, with no crash if that isn't so, or if any of the target values are missing or invalid.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
video = traverse_obj(response, ('result', 'video', 0, T(dict))) or {}
|
||||||
|
# formerly:
|
||||||
|
# video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||||
description = video.get('summary')
|
description = video.get('summary')
|
||||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||||
view_count = int_or_none(video.get('views'))
|
view_count = int_or_none(video.get('views'))
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Safely extract nested lists
|
||||||
|
|
||||||
|
Suppose you've extracted JSON like this into a Python data structure named `media_json` using, say, the `_download_json()` or `_parse_json()` methods of `InfoExtractor`:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"title": "Example video",
|
||||||
|
"comment": "try extracting this",
|
||||||
|
"media": [{
|
||||||
|
"type": "bad",
|
||||||
|
"size": 320,
|
||||||
|
"url": "https://some.cdn.site/bad.mp4"
|
||||||
|
}, {
|
||||||
|
"type": "streaming",
|
||||||
|
"url": "https://some.cdn.site/hls.m3u8"
|
||||||
|
}, {
|
||||||
|
"type": "super",
|
||||||
|
"size": 1280,
|
||||||
|
"url": "https://some.cdn.site/good.webm"
|
||||||
|
}],
|
||||||
|
"moreStuff": "more values",
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Then extractor code like this can collect the various fields of the JSON:
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
...
|
||||||
|
...
|
||||||
|
info_dict = {}
|
||||||
|
# extract title and description if valid and not empty
|
||||||
|
info_dict.update(traverse_obj(media_json, {
|
||||||
|
'title': ('title', T(txt_or_none)),
|
||||||
|
'description': ('comment', T(txt_or_none)),
|
||||||
|
}))
|
||||||
|
|
||||||
|
# extract any recognisable media formats
|
||||||
|
fmts = []
|
||||||
|
# traverse into "media" list, extract `dict`s with desired keys
|
||||||
|
for fmt in traverse_obj(media_json, ('media', Ellipsis, {
|
||||||
|
'format_id': ('type', T(txt_or_none)),
|
||||||
|
'url': ('url', T(url_or_none)),
|
||||||
|
'width': ('size', T(int_or_none)), })):
|
||||||
|
# bad `fmt` values were `None` and removed
|
||||||
|
if 'url' not in fmt:
|
||||||
|
continue
|
||||||
|
fmt_url = fmt['url'] # known to be valid URL
|
||||||
|
ext = determine_ext(fmt_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
fmts.extend(self._extract_m3u8_formats(fmt_url, video_id, 'mp4', fatal=False))
|
||||||
|
else:
|
||||||
|
fmt['ext'] = ext
|
||||||
|
fmts.append(fmt)
|
||||||
|
|
||||||
|
# sort, raise if no formats
|
||||||
|
self._sort_formats(fmts)
|
||||||
|
|
||||||
|
info_dict['formats'] = fmts
|
||||||
|
...
|
||||||
|
```
|
||||||
|
The extractor raises an exception rather than random crashes if the JSON structure changes so that no formats are found.
|
||||||
|
|
||||||
# EMBEDDING YOUTUBE-DL
|
# EMBEDDING YOUTUBE-DL
|
||||||
|
|
||||||
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
|
youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
|
||||||
|
1
devscripts/__init__.py
Normal file
1
devscripts/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
# Empty file needed to make devscripts.utils properly importable from outside
|
@ -5,8 +5,12 @@ import os
|
|||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
from utils import read_file
|
||||||
|
|
||||||
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
|
||||||
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
|
||||||
@ -18,9 +22,8 @@ def build_completion(opt_parser):
|
|||||||
for option in group.option_list:
|
for option in group.option_list:
|
||||||
# for every long flag
|
# for every long flag
|
||||||
opts_flag.append(option.get_opt_string())
|
opts_flag.append(option.get_opt_string())
|
||||||
with open(BASH_COMPLETION_TEMPLATE) as f:
|
template = read_file(BASH_COMPLETION_TEMPLATE)
|
||||||
template = f.read()
|
with open(BASH_COMPLETION_FILE, "w", encoding='utf-8') as f:
|
||||||
with open(BASH_COMPLETION_FILE, "w") as f:
|
|
||||||
# just using the special char
|
# just using the special char
|
||||||
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
|
filled_template = template.replace("{{flags}}", " ".join(opts_flag))
|
||||||
f.write(filled_template)
|
f.write(filled_template)
|
||||||
|
@ -49,15 +49,34 @@ def cli_to_api(*opts):
|
|||||||
|
|
||||||
# from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
|
# from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
|
||||||
default = parsed_options([])
|
default = parsed_options([])
|
||||||
diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
|
|
||||||
|
def neq_opt(a, b):
|
||||||
|
if a == b:
|
||||||
|
return False
|
||||||
|
if a is None and repr(type(object)).endswith(".utils.DateRange'>"):
|
||||||
|
return '0001-01-01 - 9999-12-31' != '{0}'.format(b)
|
||||||
|
return a != b
|
||||||
|
|
||||||
|
diff = dict((k, v) for k, v in parsed_options(opts).items() if neq_opt(default[k], v))
|
||||||
if 'postprocessors' in diff:
|
if 'postprocessors' in diff:
|
||||||
diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
|
diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
|
||||||
return diff
|
return diff
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
from pprint import pprint
|
from pprint import PrettyPrinter
|
||||||
pprint(cli_to_api(*sys.argv))
|
|
||||||
|
pprint = PrettyPrinter()
|
||||||
|
super_format = pprint.format
|
||||||
|
|
||||||
|
def format(object, context, maxlevels, level):
|
||||||
|
if repr(type(object)).endswith(".utils.DateRange'>"):
|
||||||
|
return '{0}: {1}>'.format(repr(object)[:-2], object), True, False
|
||||||
|
return super_format(object, context, maxlevels, level)
|
||||||
|
|
||||||
|
pprint.format = format
|
||||||
|
|
||||||
|
pprint.pprint(cli_to_api(*sys.argv))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import netrc
|
import netrc
|
||||||
@ -10,7 +9,9 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
@ -22,6 +23,7 @@ from youtube_dl.utils import (
|
|||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
)
|
)
|
||||||
|
from utils import read_file
|
||||||
|
|
||||||
|
|
||||||
class GitHubReleaser(object):
|
class GitHubReleaser(object):
|
||||||
@ -89,8 +91,7 @@ def main():
|
|||||||
|
|
||||||
changelog_file, version, build_path = args
|
changelog_file, version, build_path = args
|
||||||
|
|
||||||
with io.open(changelog_file, encoding='utf-8') as inf:
|
changelog = read_file(changelog_file)
|
||||||
changelog = inf.read()
|
|
||||||
|
|
||||||
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
|
mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog)
|
||||||
body = mobj.group(1) if mobj else ''
|
body = mobj.group(1) if mobj else ''
|
||||||
|
@ -6,10 +6,13 @@ import os
|
|||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
from youtube_dl.utils import shell_quote
|
from youtube_dl.utils import shell_quote
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
FISH_COMPLETION_FILE = 'youtube-dl.fish'
|
||||||
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
|
||||||
|
|
||||||
@ -38,11 +41,9 @@ def build_completion(opt_parser):
|
|||||||
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
complete_cmd.extend(EXTRA_ARGS.get(long_option, []))
|
||||||
commands.append(shell_quote(complete_cmd))
|
commands.append(shell_quote(complete_cmd))
|
||||||
|
|
||||||
with open(FISH_COMPLETION_TEMPLATE) as f:
|
template = read_file(FISH_COMPLETION_TEMPLATE)
|
||||||
template = f.read()
|
|
||||||
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
filled_template = template.replace('{{commands}}', '\n'.join(commands))
|
||||||
with open(FISH_COMPLETION_FILE, 'w') as f:
|
write_file(FISH_COMPLETION_FILE, filled_template)
|
||||||
f.write(filled_template)
|
|
||||||
|
|
||||||
|
|
||||||
parser = youtube_dl.parseOpts()[0]
|
parser = youtube_dl.parseOpts()[0]
|
||||||
|
@ -6,16 +6,21 @@ import sys
|
|||||||
import hashlib
|
import hashlib
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
if len(sys.argv) <= 1:
|
if len(sys.argv) <= 1:
|
||||||
print('Specify the version number as parameter')
|
print('Specify the version number as parameter')
|
||||||
sys.exit()
|
sys.exit()
|
||||||
version = sys.argv[1]
|
version = sys.argv[1]
|
||||||
|
|
||||||
with open('update/LATEST_VERSION', 'w') as f:
|
write_file('update/LATEST_VERSION', version)
|
||||||
f.write(version)
|
|
||||||
|
|
||||||
versions_info = json.load(open('update/versions.json'))
|
versions_info = json.loads(read_file('update/versions.json'))
|
||||||
if 'signature' in versions_info:
|
if 'signature' in versions_info:
|
||||||
del versions_info['signature']
|
del versions_info['signature']
|
||||||
|
|
||||||
@ -39,5 +44,5 @@ for key, filename in filenames.items():
|
|||||||
versions_info['versions'][version] = new_version
|
versions_info['versions'][version] = new_version
|
||||||
versions_info['latest'] = version
|
versions_info['latest'] = version
|
||||||
|
|
||||||
with open('update/versions.json', 'w') as jsonf:
|
with open('update/versions.json', 'w', encoding='utf-8') as jsonf:
|
||||||
json.dump(versions_info, jsonf, indent=4, sort_keys=True)
|
json.dumps(versions_info, jsonf, indent=4, sort_keys=True)
|
||||||
|
@ -2,14 +2,21 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
versions_info = json.load(open('update/versions.json'))
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
|
versions_info = json.loads(read_file('update/versions.json'))
|
||||||
version = versions_info['latest']
|
version = versions_info['latest']
|
||||||
version_dict = versions_info['versions'][version]
|
version_dict = versions_info['versions'][version]
|
||||||
|
|
||||||
# Read template page
|
# Read template page
|
||||||
with open('download.html.in', 'r', encoding='utf-8') as tmplf:
|
template = read_file('download.html.in')
|
||||||
template = tmplf.read()
|
|
||||||
|
|
||||||
template = template.replace('@PROGRAM_VERSION@', version)
|
template = template.replace('@PROGRAM_VERSION@', version)
|
||||||
template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
|
template = template.replace('@PROGRAM_URL@', version_dict['bin'][0])
|
||||||
@ -18,5 +25,5 @@ template = template.replace('@EXE_URL@', version_dict['exe'][0])
|
|||||||
template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
|
template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1])
|
||||||
template = template.replace('@TAR_URL@', version_dict['tar'][0])
|
template = template.replace('@TAR_URL@', version_dict['tar'][0])
|
||||||
template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
|
template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1])
|
||||||
with open('download.html', 'w', encoding='utf-8') as dlf:
|
|
||||||
dlf.write(template)
|
write_file('download.html', template)
|
||||||
|
@ -5,17 +5,22 @@ from __future__ import with_statement, unicode_literals
|
|||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import glob
|
import glob
|
||||||
import io # For Python 2 compatibility
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
year = str(datetime.datetime.now().year)
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
from youtube_dl import compat_str
|
||||||
|
|
||||||
|
year = compat_str(datetime.datetime.now().year)
|
||||||
for fn in glob.glob('*.html*'):
|
for fn in glob.glob('*.html*'):
|
||||||
with io.open(fn, encoding='utf-8') as f:
|
content = read_file(fn)
|
||||||
content = f.read()
|
|
||||||
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
||||||
if content != newc:
|
if content != newc:
|
||||||
tmpFn = fn + '.part'
|
tmpFn = fn + '.part'
|
||||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
write_file(tmpFn, newc)
|
||||||
outf.write(newc)
|
|
||||||
os.rename(tmpFn, fn)
|
os.rename(tmpFn, fn)
|
||||||
|
@ -2,10 +2,16 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
|
import os.path
|
||||||
import textwrap
|
import textwrap
|
||||||
|
import sys
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from utils import write_file
|
||||||
|
|
||||||
atom_template = textwrap.dedent("""\
|
atom_template = textwrap.dedent("""\
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
@ -72,5 +78,4 @@ for v in versions:
|
|||||||
entries_str = textwrap.indent(''.join(entries), '\t')
|
entries_str = textwrap.indent(''.join(entries), '\t')
|
||||||
atom_template = atom_template.replace('@ENTRIES@', entries_str)
|
atom_template = atom_template.replace('@ENTRIES@', entries_str)
|
||||||
|
|
||||||
with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file:
|
write_file('update/releases.atom', atom_template)
|
||||||
atom_file.write(atom_template)
|
|
||||||
|
@ -5,15 +5,17 @@ import sys
|
|||||||
import os
|
import os
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
# We must be able to import youtube_dl
|
# We must be able to import youtube_dl
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(dirn(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
|
template = read_file('supportedsites.html.in')
|
||||||
template = tmplf.read()
|
|
||||||
|
|
||||||
ie_htmls = []
|
ie_htmls = []
|
||||||
for ie in youtube_dl.list_extractors(age_limit=None):
|
for ie in youtube_dl.list_extractors(age_limit=None):
|
||||||
@ -29,8 +31,7 @@ def main():
|
|||||||
|
|
||||||
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
|
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
|
||||||
|
|
||||||
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
|
write_file('supportedsites.html', template)
|
||||||
sitesf.write(template)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
parser = optparse.OptionParser(usage='%prog INFILE OUTFILE')
|
||||||
@ -14,8 +15,7 @@ def main():
|
|||||||
|
|
||||||
infile, outfile = args
|
infile, outfile = args
|
||||||
|
|
||||||
with io.open(infile, encoding='utf-8') as inf:
|
readme = read_file(infile)
|
||||||
readme = inf.read()
|
|
||||||
|
|
||||||
bug_text = re.search(
|
bug_text = re.search(
|
||||||
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
|
r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
|
||||||
@ -25,8 +25,7 @@ def main():
|
|||||||
|
|
||||||
out = bug_text + dev_text
|
out = bug_text + dev_text
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
write_file(outfile, out)
|
||||||
outf.write(out)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from utils import read_file, read_version, write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -13,17 +16,11 @@ def main():
|
|||||||
|
|
||||||
infile, outfile = args
|
infile, outfile = args
|
||||||
|
|
||||||
with io.open(infile, encoding='utf-8') as inf:
|
issue_template_tmpl = read_file(infile)
|
||||||
issue_template_tmpl = inf.read()
|
|
||||||
|
|
||||||
# Get the version from youtube_dl/version.py without importing the package
|
out = issue_template_tmpl % {'version': read_version()}
|
||||||
exec(compile(open('youtube_dl/version.py').read(),
|
|
||||||
'youtube_dl/version.py', 'exec'))
|
|
||||||
|
|
||||||
out = issue_template_tmpl % {'version': locals()['__version__']}
|
write_file(outfile, out)
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
|
||||||
outf.write(out)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -1,35 +1,48 @@
|
|||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
from inspect import getsource
|
from inspect import getsource
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
lazy_extractors_filename = sys.argv[1]
|
lazy_extractors_filename = sys.argv[1]
|
||||||
if os.path.exists(lazy_extractors_filename):
|
if os.path.exists(lazy_extractors_filename):
|
||||||
os.remove(lazy_extractors_filename)
|
os.remove(lazy_extractors_filename)
|
||||||
# Py2: may be confused by leftover lazy_extractors.pyc
|
# Py2: may be confused by leftover lazy_extractors.pyc
|
||||||
try:
|
if sys.version_info[0] < 3:
|
||||||
|
for c in ('c', 'o'):
|
||||||
|
try:
|
||||||
os.remove(lazy_extractors_filename + 'c')
|
os.remove(lazy_extractors_filename + 'c')
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
from devscripts.utils import read_file, write_file
|
||||||
|
from youtube_dl.compat import compat_register_utf8
|
||||||
|
|
||||||
|
compat_register_utf8()
|
||||||
|
|
||||||
from youtube_dl.extractor import _ALL_CLASSES
|
from youtube_dl.extractor import _ALL_CLASSES
|
||||||
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
|
|
||||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
module_template = read_file('devscripts/lazy_load_template.py')
|
||||||
module_template = f.read()
|
|
||||||
|
|
||||||
|
def get_source(m):
|
||||||
|
return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
|
||||||
|
|
||||||
|
|
||||||
module_contents = [
|
module_contents = [
|
||||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
module_template,
|
||||||
|
get_source(InfoExtractor.suitable),
|
||||||
|
get_source(InfoExtractor._match_valid_url) + '\n',
|
||||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||||
# needed for suitable() methods of Youtube extractor (see #28780)
|
# needed for suitable() methods of Youtube extractor (see #28780)
|
||||||
'from youtube_dl.utils import parse_qs\n',
|
'from youtube_dl.utils import parse_qs, variadic\n',
|
||||||
]
|
]
|
||||||
|
|
||||||
ie_template = '''
|
ie_template = '''
|
||||||
@ -62,7 +75,7 @@ def build_lazy_ie(ie, name):
|
|||||||
valid_url=valid_url,
|
valid_url=valid_url,
|
||||||
module=ie.__module__)
|
module=ie.__module__)
|
||||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||||
s += '\n' + getsource(ie.suitable)
|
s += '\n' + get_source(ie.suitable)
|
||||||
if hasattr(ie, '_make_valid_url'):
|
if hasattr(ie, '_make_valid_url'):
|
||||||
# search extractors
|
# search extractors
|
||||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||||
@ -102,7 +115,17 @@ for ie in ordered_cls:
|
|||||||
module_contents.append(
|
module_contents.append(
|
||||||
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
||||||
|
|
||||||
module_src = '\n'.join(module_contents) + '\n'
|
module_src = '\n'.join(module_contents)
|
||||||
|
|
||||||
with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
|
write_file(lazy_extractors_filename, module_src + '\n')
|
||||||
f.write(module_src)
|
|
||||||
|
# work around JVM byte code module limit in Jython
|
||||||
|
if sys.platform.startswith('java') and sys.version_info[:2] == (2, 7):
|
||||||
|
import subprocess
|
||||||
|
from youtube_dl.compat import compat_subprocess_get_DEVNULL
|
||||||
|
# if Python 2.7 is available, use it to compile the module for Jython
|
||||||
|
try:
|
||||||
|
# if Python 2.7 is available, use it to compile the module for Jython
|
||||||
|
subprocess.check_call(['python2.7', '-m', 'py_compile', lazy_extractors_filename], stdout=compat_subprocess_get_DEVNULL())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
@ -1,8 +1,14 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
import os.path
|
||||||
import sys
|
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from utils import read_file
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
README_FILE = 'README.md'
|
README_FILE = 'README.md'
|
||||||
helptext = sys.stdin.read()
|
helptext = sys.stdin.read()
|
||||||
@ -10,8 +16,7 @@ helptext = sys.stdin.read()
|
|||||||
if isinstance(helptext, bytes):
|
if isinstance(helptext, bytes):
|
||||||
helptext = helptext.decode('utf-8')
|
helptext = helptext.decode('utf-8')
|
||||||
|
|
||||||
with io.open(README_FILE, encoding='utf-8') as f:
|
oldreadme = read_file(README_FILE)
|
||||||
oldreadme = f.read()
|
|
||||||
|
|
||||||
header = oldreadme[:oldreadme.index('# OPTIONS')]
|
header = oldreadme[:oldreadme.index('# OPTIONS')]
|
||||||
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
|
footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
|
||||||
@ -20,7 +25,7 @@ options = helptext[helptext.index(' General Options:') + 19:]
|
|||||||
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
|
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
|
||||||
options = '# OPTIONS\n' + options + '\n'
|
options = '# OPTIONS\n' + options + '\n'
|
||||||
|
|
||||||
with io.open(README_FILE, 'w', encoding='utf-8') as f:
|
with open(README_FILE, 'w', encoding='utf-8') as f:
|
||||||
f.write(header)
|
f.write(header)
|
||||||
f.write(options)
|
f.write(options)
|
||||||
f.write(footer)
|
f.write(footer)
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
import os
|
import os.path
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
# Import youtube_dl
|
# Import youtube_dl
|
||||||
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
|
dirn = os.path.dirname
|
||||||
sys.path.insert(0, ROOT_DIR)
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
|
from utils import write_file
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||||
@ -38,8 +40,7 @@ def main():
|
|||||||
' - ' + md + '\n'
|
' - ' + md + '\n'
|
||||||
for md in gen_ies_md(ies))
|
for md in gen_ies_md(ies))
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
write_file(outfile, out)
|
||||||
outf.write(out)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import optparse
|
import optparse
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||||
|
|
||||||
PREFIX = r'''%YOUTUBE-DL(1)
|
PREFIX = r'''%YOUTUBE-DL(1)
|
||||||
|
|
||||||
# NAME
|
# NAME
|
||||||
@ -29,8 +29,7 @@ def main():
|
|||||||
|
|
||||||
outfile, = args
|
outfile, = args
|
||||||
|
|
||||||
with io.open(README_FILE, encoding='utf-8') as f:
|
readme = read_file(README_FILE)
|
||||||
readme = f.read()
|
|
||||||
|
|
||||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||||
@ -38,8 +37,7 @@ def main():
|
|||||||
|
|
||||||
readme = filter_options(readme)
|
readme = filter_options(readme)
|
||||||
|
|
||||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
write_file(outfile, readme)
|
||||||
outf.write(readme)
|
|
||||||
|
|
||||||
|
|
||||||
def filter_options(readme):
|
def filter_options(readme):
|
||||||
|
62
devscripts/utils.py
Normal file
62
devscripts/utils.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import functools
|
||||||
|
import os.path
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_kwargs,
|
||||||
|
compat_open as open,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def read_file(fname):
|
||||||
|
with open(fname, encoding='utf-8') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
def write_file(fname, content, mode='w'):
|
||||||
|
with open(fname, mode, encoding='utf-8') as f:
|
||||||
|
return f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
def read_version(fname='youtube_dl/version.py'):
|
||||||
|
"""Get the version without importing the package"""
|
||||||
|
exec(compile(read_file(fname), fname, 'exec'))
|
||||||
|
return locals()['__version__']
|
||||||
|
|
||||||
|
|
||||||
|
def get_filename_args(has_infile=False, default_outfile=None):
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
if has_infile:
|
||||||
|
parser.add_argument('infile', help='Input file')
|
||||||
|
kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {}
|
||||||
|
kwargs['help'] = 'Output file'
|
||||||
|
parser.add_argument('outfile', **compat_kwargs(kwargs))
|
||||||
|
|
||||||
|
opts = parser.parse_args()
|
||||||
|
if has_infile:
|
||||||
|
return opts.infile, opts.outfile
|
||||||
|
return opts.outfile
|
||||||
|
|
||||||
|
|
||||||
|
def compose_functions(*functions):
|
||||||
|
return lambda x: functools.reduce(lambda y, f: f(y), functions, x)
|
||||||
|
|
||||||
|
|
||||||
|
def run_process(*args, **kwargs):
|
||||||
|
kwargs.setdefault('text', True)
|
||||||
|
kwargs.setdefault('check', True)
|
||||||
|
kwargs.setdefault('capture_output', True)
|
||||||
|
if kwargs['text']:
|
||||||
|
kwargs.setdefault('encoding', 'utf-8')
|
||||||
|
kwargs.setdefault('errors', 'replace')
|
||||||
|
kwargs = compat_kwargs(kwargs)
|
||||||
|
return subprocess.run(args, **kwargs)
|
@ -7,6 +7,8 @@ import sys
|
|||||||
|
|
||||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from utils import read_file, write_file
|
||||||
|
|
||||||
|
|
||||||
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
ZSH_COMPLETION_FILE = "youtube-dl.zsh"
|
||||||
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
|
||||||
@ -34,15 +36,13 @@ def build_completion(opt_parser):
|
|||||||
|
|
||||||
flags = [opt.get_opt_string() for opt in opts]
|
flags = [opt.get_opt_string() for opt in opts]
|
||||||
|
|
||||||
with open(ZSH_COMPLETION_TEMPLATE) as f:
|
template = read_file(ZSH_COMPLETION_TEMPLATE)
|
||||||
template = f.read()
|
|
||||||
|
|
||||||
template = template.replace("{{fileopts}}", "|".join(fileopts))
|
template = template.replace("{{fileopts}}", "|".join(fileopts))
|
||||||
template = template.replace("{{diropts}}", "|".join(diropts))
|
template = template.replace("{{diropts}}", "|".join(diropts))
|
||||||
template = template.replace("{{flags}}", " ".join(flags))
|
template = template.replace("{{flags}}", " ".join(flags))
|
||||||
|
|
||||||
with open(ZSH_COMPLETION_FILE, "w") as f:
|
write_file(ZSH_COMPLETION_FILE, template)
|
||||||
f.write(template)
|
|
||||||
|
|
||||||
|
|
||||||
parser = youtube_dl.parseOpts()[0]
|
parser = youtube_dl.parseOpts()[0]
|
||||||
|
@ -1,22 +1,24 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import types
|
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
|
import types
|
||||||
|
import unittest
|
||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
|
compat_open as open,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
IDENTITY,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
@ -27,10 +29,10 @@ def get_params(override=None):
|
|||||||
"parameters.json")
|
"parameters.json")
|
||||||
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||||
"local_parameters.json")
|
"local_parameters.json")
|
||||||
with io.open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
with open(PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||||
parameters = json.load(pf)
|
parameters = json.load(pf)
|
||||||
if os.path.exists(LOCAL_PARAMETERS_FILE):
|
if os.path.exists(LOCAL_PARAMETERS_FILE):
|
||||||
with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
|
with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
|
||||||
parameters.update(json.load(pf))
|
parameters.update(json.load(pf))
|
||||||
if override:
|
if override:
|
||||||
parameters.update(override)
|
parameters.update(override)
|
||||||
@ -72,7 +74,8 @@ class FakeYDL(YoutubeDL):
|
|||||||
def to_screen(self, s, skip_eol=None):
|
def to_screen(self, s, skip_eol=None):
|
||||||
print(s)
|
print(s)
|
||||||
|
|
||||||
def trouble(self, s, tb=None):
|
def trouble(self, *args, **kwargs):
|
||||||
|
s = args[0] if len(args) > 0 else kwargs.get('message', 'Missing message')
|
||||||
raise Exception(s)
|
raise Exception(s)
|
||||||
|
|
||||||
def download(self, x):
|
def download(self, x):
|
||||||
@ -139,7 +142,7 @@ def expect_value(self, got, expected, field):
|
|||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
contains_str in got,
|
contains_str in got,
|
||||||
'field %s (value: %r) should contain %r' % (field, got, contains_str))
|
'field %s (value: %r) should contain %r' % (field, got, contains_str))
|
||||||
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
|
elif isinstance(expected, compat_str) and re.match(r'lambda \w+:', expected):
|
||||||
fn = eval(expected)
|
fn = eval(expected)
|
||||||
suite = expected.split(':', 1)[1].strip()
|
suite = expected.split(':', 1)[1].strip()
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
@ -178,18 +181,18 @@ def expect_value(self, got, expected, field):
|
|||||||
op, _, expected_num = expected.partition(':')
|
op, _, expected_num = expected.partition(':')
|
||||||
expected_num = int(expected_num)
|
expected_num = int(expected_num)
|
||||||
if op == 'mincount':
|
if op == 'mincount':
|
||||||
assert_func = assertGreaterEqual
|
assert_func = self.assertGreaterEqual
|
||||||
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||||
elif op == 'maxcount':
|
elif op == 'maxcount':
|
||||||
assert_func = assertLessEqual
|
assert_func = self.assertLessEqual
|
||||||
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||||
elif op == 'count':
|
elif op == 'count':
|
||||||
assert_func = assertEqual
|
assert_func = self.assertEqual
|
||||||
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
assert_func(
|
assert_func(
|
||||||
self, len(got), expected_num,
|
len(got), expected_num,
|
||||||
msg_tmpl % (expected_num, field, len(got)))
|
msg_tmpl % (expected_num, field, len(got)))
|
||||||
return
|
return
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -259,27 +262,6 @@ def assertRegexpMatches(self, text, regexp, msg=None):
|
|||||||
self.assertTrue(m, msg)
|
self.assertTrue(m, msg)
|
||||||
|
|
||||||
|
|
||||||
def assertGreaterEqual(self, got, expected, msg=None):
|
|
||||||
if not (got >= expected):
|
|
||||||
if msg is None:
|
|
||||||
msg = '%r not greater than or equal to %r' % (got, expected)
|
|
||||||
self.assertTrue(got >= expected, msg)
|
|
||||||
|
|
||||||
|
|
||||||
def assertLessEqual(self, got, expected, msg=None):
|
|
||||||
if not (got <= expected):
|
|
||||||
if msg is None:
|
|
||||||
msg = '%r not less than or equal to %r' % (got, expected)
|
|
||||||
self.assertTrue(got <= expected, msg)
|
|
||||||
|
|
||||||
|
|
||||||
def assertEqual(self, got, expected, msg=None):
|
|
||||||
if not (got == expected):
|
|
||||||
if msg is None:
|
|
||||||
msg = '%r not equal to %r' % (got, expected)
|
|
||||||
self.assertTrue(got == expected, msg)
|
|
||||||
|
|
||||||
|
|
||||||
def expect_warnings(ydl, warnings_re):
|
def expect_warnings(ydl, warnings_re):
|
||||||
real_warning = ydl.report_warning
|
real_warning = ydl.report_warning
|
||||||
|
|
||||||
@ -297,3 +279,7 @@ def http_server_port(httpd):
|
|||||||
else:
|
else:
|
||||||
sock = httpd.socket
|
sock = httpd.socket
|
||||||
return sock.getsockname()[1]
|
return sock.getsockname()[1]
|
||||||
|
|
||||||
|
|
||||||
|
def expectedFailureIf(cond):
|
||||||
|
return unittest.expectedFailure if cond else IDENTITY
|
||||||
|
@ -3,19 +3,37 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
|
||||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
|
||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
|
||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
from test.helper import (
|
||||||
|
expect_dict,
|
||||||
|
expect_value,
|
||||||
|
FakeYDL,
|
||||||
|
http_server_port,
|
||||||
|
)
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
|
compat_http_server,
|
||||||
|
compat_open as open,
|
||||||
|
)
|
||||||
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
|
from youtube_dl.extractor import (
|
||||||
|
get_info_extractor,
|
||||||
|
YoutubeIE,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
encode_data_uri,
|
||||||
|
ExtractorError,
|
||||||
|
RegexNotFoundError,
|
||||||
|
strip_jsonp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
TEAPOT_RESPONSE_STATUS = 418
|
TEAPOT_RESPONSE_STATUS = 418
|
||||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||||
@ -100,6 +118,74 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||||
|
|
||||||
|
def test_search_nextjs_data(self):
|
||||||
|
html = '''
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content=
|
||||||
|
"text/html; charset=utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width">
|
||||||
|
<title>Test _search_nextjs_data()</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="__next">
|
||||||
|
<div style="background-color:#17171E" class="FU" dir="ltr">
|
||||||
|
<div class="sc-93de261d-0 dyzzYE">
|
||||||
|
<div>
|
||||||
|
<header class="HD"></header>
|
||||||
|
<main class="MN">
|
||||||
|
<div style="height:0" class="HT0">
|
||||||
|
<div style="width:NaN%" data-testid=
|
||||||
|
"stream-container" class="WDN"></div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
<footer class="sc-6e5faf91-0 dEGaHS"></footer>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
|
{"props":{"pageProps":{"video":{"id":"testid"}}}}
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
search = self.ie._search_nextjs_data(html, 'testID')
|
||||||
|
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
|
||||||
|
search = self.ie._search_nextjs_data(
|
||||||
|
'no next.js data here, move along', 'testID', default={'status': 0})
|
||||||
|
self.assertEqual(search['status'], 0)
|
||||||
|
|
||||||
|
def test_search_nuxt_data(self):
|
||||||
|
html = '''
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content=
|
||||||
|
"text/html; charset=utf-8">
|
||||||
|
<title>Nuxt.js Test Page</title>
|
||||||
|
<meta name="viewport" content=
|
||||||
|
"width=device-width, initial-scale=1">
|
||||||
|
<meta data-hid="robots" name="robots" content="all">
|
||||||
|
</head>
|
||||||
|
<body class="BD">
|
||||||
|
<div id="__layout">
|
||||||
|
<h1 class="H1">Example heading</h1>
|
||||||
|
<div class="IN">
|
||||||
|
<p>Decoy text</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null));
|
||||||
|
</script>
|
||||||
|
<script src="/_nuxt/a12345b.js" defer="defer"></script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
search = self.ie._search_nuxt_data(html, 'testID')
|
||||||
|
self.assertEqual(search['track']['id'], 'testid')
|
||||||
|
|
||||||
def test_search_json_ld_realworld(self):
|
def test_search_json_ld_realworld(self):
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
||||||
expect_dict(
|
expect_dict(
|
||||||
@ -348,6 +434,24 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
}],
|
}],
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# from https://0000.studio/
|
||||||
|
# with type attribute but without extension in URL
|
||||||
|
expect_dict(
|
||||||
|
self,
|
||||||
|
self.ie._parse_html5_media_entries(
|
||||||
|
'https://0000.studio',
|
||||||
|
r'''
|
||||||
|
<video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
|
||||||
|
controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
|
||||||
|
</video>
|
||||||
|
''', None)[0],
|
||||||
|
{
|
||||||
|
'formats': [{
|
||||||
|
'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
|
||||||
def test_extract_jwplayer_data_realworld(self):
|
def test_extract_jwplayer_data_realworld(self):
|
||||||
# from http://www.suffolk.edu/sjc/
|
# from http://www.suffolk.edu/sjc/
|
||||||
expect_dict(
|
expect_dict(
|
||||||
@ -801,7 +905,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
]
|
]
|
||||||
|
|
||||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||||
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_m3u8_formats(
|
formats = self.ie._parse_m3u8_formats(
|
||||||
f.read(), m3u8_url, ext='mp4')
|
f.read(), m3u8_url, ext='mp4')
|
||||||
@ -892,7 +996,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
'tbr': 5997.485,
|
'tbr': 5997.485,
|
||||||
'width': 1920,
|
'width': 1920,
|
||||||
'height': 1080,
|
'height': 1080,
|
||||||
}]
|
}],
|
||||||
|
{},
|
||||||
), (
|
), (
|
||||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||||
'urls_only',
|
'urls_only',
|
||||||
@ -975,7 +1080,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
'tbr': 4400,
|
'tbr': 4400,
|
||||||
'width': 1920,
|
'width': 1920,
|
||||||
'height': 1080,
|
'height': 1080,
|
||||||
}]
|
}],
|
||||||
|
{},
|
||||||
), (
|
), (
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/20346
|
# https://github.com/ytdl-org/youtube-dl/issues/20346
|
||||||
# Media considered unfragmented even though it contains
|
# Media considered unfragmented even though it contains
|
||||||
@ -1021,18 +1127,185 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
'width': 360,
|
'width': 360,
|
||||||
'height': 360,
|
'height': 360,
|
||||||
'fps': 30,
|
'fps': 30,
|
||||||
}]
|
}],
|
||||||
|
{},
|
||||||
|
), (
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/30235
|
||||||
|
# Bento4 generated test mpd
|
||||||
|
# mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles
|
||||||
|
'url_and_range',
|
||||||
|
'http://unknown/manifest.mpd', # mpd_url
|
||||||
|
'http://unknown/', # mpd_base_url
|
||||||
|
[{
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'fragment_base_url': 'http://unknown/',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'format_id': 'audio-und-mp4a.40.2',
|
||||||
|
'format_note': 'DASH audio',
|
||||||
|
'container': 'm4a_dash',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'tbr': 98.808,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'fragment_base_url': 'http://unknown/',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'video-avc1',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.4D401E',
|
||||||
|
'tbr': 699.597,
|
||||||
|
'width': 768,
|
||||||
|
'height': 432
|
||||||
|
}],
|
||||||
|
{},
|
||||||
|
), (
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/27575
|
||||||
|
# GPAC generated test mpd
|
||||||
|
# MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles
|
||||||
|
'range_only',
|
||||||
|
'http://unknown/manifest.mpd', # mpd_url
|
||||||
|
'http://unknown/', # mpd_base_url
|
||||||
|
[{
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'fragment_base_url': 'http://unknown/audio_dashinit.mp4',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'format_id': '2',
|
||||||
|
'format_note': 'DASH audio',
|
||||||
|
'container': 'm4a_dash',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'tbr': 98.096,
|
||||||
|
}, {
|
||||||
|
'manifest_url': 'http://unknown/manifest.mpd',
|
||||||
|
'fragment_base_url': 'http://unknown/video_dashinit.mp4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': '1',
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'avc1.4D401E',
|
||||||
|
'tbr': 526.987,
|
||||||
|
'width': 768,
|
||||||
|
'height': 432
|
||||||
|
}],
|
||||||
|
{},
|
||||||
|
), (
|
||||||
|
'subtitles',
|
||||||
|
'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
|
||||||
|
[{
|
||||||
|
'format_id': 'audio=128001',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'tbr': 128.001,
|
||||||
|
'asr': 48000,
|
||||||
|
'format_note': 'DASH audio',
|
||||||
|
'container': 'm4a_dash',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}, {
|
||||||
|
'format_id': 'video=100000',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': 336,
|
||||||
|
'height': 144,
|
||||||
|
'tbr': 100,
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'vcodec': 'avc1.4D401F',
|
||||||
|
'acodec': 'none',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}, {
|
||||||
|
'format_id': 'video=326000',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': 562,
|
||||||
|
'height': 240,
|
||||||
|
'tbr': 326,
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'vcodec': 'avc1.4D401F',
|
||||||
|
'acodec': 'none',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}, {
|
||||||
|
'format_id': 'video=698000',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': 844,
|
||||||
|
'height': 360,
|
||||||
|
'tbr': 698,
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'vcodec': 'avc1.4D401F',
|
||||||
|
'acodec': 'none',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}, {
|
||||||
|
'format_id': 'video=1493000',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': 1126,
|
||||||
|
'height': 480,
|
||||||
|
'tbr': 1493,
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'vcodec': 'avc1.4D401F',
|
||||||
|
'acodec': 'none',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}, {
|
||||||
|
'format_id': 'video=4482000',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': 1688,
|
||||||
|
'height': 720,
|
||||||
|
'tbr': 4482,
|
||||||
|
'format_note': 'DASH video',
|
||||||
|
'container': 'mp4_dash',
|
||||||
|
'vcodec': 'avc1.4D401F',
|
||||||
|
'acodec': 'none',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}],
|
||||||
|
{
|
||||||
|
'en': [
|
||||||
|
{
|
||||||
|
'ext': 'mp4',
|
||||||
|
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
|
||||||
|
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
|
for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
|
||||||
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
|
with open('./test/testdata/mpd/%s.mpd' % mpd_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_mpd_formats(
|
formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
|
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
|
||||||
self.ie._sort_formats(formats)
|
self.ie._sort_formats(formats)
|
||||||
expect_value(self, formats, expected_formats, None)
|
expect_value(self, formats, expected_formats, None)
|
||||||
|
expect_value(self, subtitles, expected_subtitles, None)
|
||||||
|
|
||||||
def test_parse_f4m_formats(self):
|
def test_parse_f4m_formats(self):
|
||||||
_TEST_CASES = [
|
_TEST_CASES = [
|
||||||
@ -1053,7 +1326,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
]
|
]
|
||||||
|
|
||||||
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
|
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
|
||||||
with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
|
with open('./test/testdata/f4m/%s.f4m' % f4m_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
formats = self.ie._parse_f4m_formats(
|
formats = self.ie._parse_f4m_formats(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
@ -1101,7 +1374,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
]
|
]
|
||||||
|
|
||||||
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
|
||||||
with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
with open('./test/testdata/xspf/%s.xspf' % xspf_file,
|
||||||
mode='r', encoding='utf-8') as f:
|
mode='r', encoding='utf-8') as f:
|
||||||
entries = self.ie._parse_xspf(
|
entries = self.ie._parse_xspf(
|
||||||
compat_etree_fromstring(f.read().encode('utf-8')),
|
compat_etree_fromstring(f.read().encode('utf-8')),
|
||||||
|
@ -10,14 +10,31 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
|
|
||||||
from test.helper import FakeYDL, assertRegexpMatches
|
from test.helper import (
|
||||||
|
FakeYDL,
|
||||||
|
assertRegexpMatches,
|
||||||
|
try_rm,
|
||||||
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_str, compat_urllib_error
|
from youtube_dl.compat import (
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_cookies_SimpleCookie,
|
||||||
|
compat_kwargs,
|
||||||
|
compat_open as open,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_error,
|
||||||
|
)
|
||||||
|
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.postprocessor.common import PostProcessor
|
from youtube_dl.postprocessor.common import PostProcessor
|
||||||
from youtube_dl.utils import ExtractorError, match_filter_func
|
from youtube_dl.utils import (
|
||||||
|
ExtractorError,
|
||||||
|
match_filter_func,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
|
||||||
TEST_URL = 'http://localhost/sample.mp4'
|
TEST_URL = 'http://localhost/sample.mp4'
|
||||||
|
|
||||||
@ -29,11 +46,14 @@ class YDL(FakeYDL):
|
|||||||
self.msgs = []
|
self.msgs = []
|
||||||
|
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
self.downloaded_info_dicts.append(info_dict)
|
self.downloaded_info_dicts.append(info_dict.copy())
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
self.msgs.append(msg)
|
self.msgs.append(msg)
|
||||||
|
|
||||||
|
def dl(self, *args, **kwargs):
|
||||||
|
assert False, 'Downloader must not be invoked for test_YoutubeDL'
|
||||||
|
|
||||||
|
|
||||||
def _make_result(formats, **kwargs):
|
def _make_result(formats, **kwargs):
|
||||||
res = {
|
res = {
|
||||||
@ -42,8 +62,9 @@ def _make_result(formats, **kwargs):
|
|||||||
'title': 'testttitle',
|
'title': 'testttitle',
|
||||||
'extractor': 'testex',
|
'extractor': 'testex',
|
||||||
'extractor_key': 'TestEx',
|
'extractor_key': 'TestEx',
|
||||||
|
'webpage_url': 'http://example.com/watch?v=shenanigans',
|
||||||
}
|
}
|
||||||
res.update(**kwargs)
|
res.update(**compat_kwargs(kwargs))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
@ -681,12 +702,12 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
|
|
||||||
class SimplePP(PostProcessor):
|
class SimplePP(PostProcessor):
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
with open(audiofile, 'wt') as f:
|
with open(audiofile, 'w') as f:
|
||||||
f.write('EXAMPLE')
|
f.write('EXAMPLE')
|
||||||
return [info['filepath']], info
|
return [info['filepath']], info
|
||||||
|
|
||||||
def run_pp(params, PP):
|
def run_pp(params, PP):
|
||||||
with open(filename, 'wt') as f:
|
with open(filename, 'w') as f:
|
||||||
f.write('EXAMPLE')
|
f.write('EXAMPLE')
|
||||||
ydl = YoutubeDL(params)
|
ydl = YoutubeDL(params)
|
||||||
ydl.add_post_processor(PP())
|
ydl.add_post_processor(PP())
|
||||||
@ -705,7 +726,7 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
|
|
||||||
class ModifierPP(PostProcessor):
|
class ModifierPP(PostProcessor):
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
with open(info['filepath'], 'wt') as f:
|
with open(info['filepath'], 'w') as f:
|
||||||
f.write('MODIFIED')
|
f.write('MODIFIED')
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
@ -930,17 +951,11 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||||
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||||
|
|
||||||
class _YDL(YDL):
|
ydl = YDL({
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super(_YDL, self).__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
def trouble(self, s, tb=None):
|
|
||||||
pass
|
|
||||||
|
|
||||||
ydl = _YDL({
|
|
||||||
'format': 'extra',
|
'format': 'extra',
|
||||||
'ignoreerrors': True,
|
'ignoreerrors': True,
|
||||||
})
|
})
|
||||||
|
ydl.trouble = lambda *_, **__: None
|
||||||
|
|
||||||
class VideoIE(InfoExtractor):
|
class VideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'video:(?P<id>\d+)'
|
_VALID_URL = r'video:(?P<id>\d+)'
|
||||||
@ -1017,5 +1032,160 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(out_info['release_date'], '20210930')
|
self.assertEqual(out_info['release_date'], '20210930')
|
||||||
|
|
||||||
|
|
||||||
|
class TestYoutubeDLCookies(unittest.TestCase):
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def encode_cookie(cookie):
|
||||||
|
if not isinstance(cookie, dict):
|
||||||
|
cookie = vars(cookie)
|
||||||
|
for name, value in cookie.items():
|
||||||
|
yield name, compat_str(value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def comparable_cookies(cls, cookies):
|
||||||
|
# Work around cookiejar cookies not being unicode strings
|
||||||
|
return sorted(map(tuple, map(sorted, map(cls.encode_cookie, cookies))))
|
||||||
|
|
||||||
|
def assertSameCookies(self, c1, c2, msg=None):
|
||||||
|
return self.assertEqual(
|
||||||
|
*map(self.comparable_cookies, (c1, c2)),
|
||||||
|
msg=msg)
|
||||||
|
|
||||||
|
def assertSameCookieStrings(self, c1, c2, msg=None):
|
||||||
|
return self.assertSameCookies(
|
||||||
|
*map(lambda c: compat_http_cookies_SimpleCookie(c).values(), (c1, c2)),
|
||||||
|
msg=msg)
|
||||||
|
|
||||||
|
def test_header_cookies(self):
|
||||||
|
|
||||||
|
ydl = FakeYDL()
|
||||||
|
ydl.report_warning = lambda *_, **__: None
|
||||||
|
|
||||||
|
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
|
||||||
|
return compat_http_cookiejar_Cookie(
|
||||||
|
version or 0, name, value, None, False,
|
||||||
|
domain, bool(domain), bool(domain), path, bool(path),
|
||||||
|
secure, expires, False, None, None, rest={})
|
||||||
|
|
||||||
|
test_url, test_domain = (t % ('yt.dl',) for t in ('https://%s/test', '.%s'))
|
||||||
|
|
||||||
|
def test(encoded_cookies, cookies, headers=False, round_trip=None, error_re=None):
|
||||||
|
def _test():
|
||||||
|
ydl.cookiejar.clear()
|
||||||
|
ydl._load_cookies(encoded_cookies, autoscope=headers)
|
||||||
|
if headers:
|
||||||
|
ydl._apply_header_cookies(test_url)
|
||||||
|
data = {'url': test_url}
|
||||||
|
ydl._calc_headers(data)
|
||||||
|
self.assertSameCookies(
|
||||||
|
cookies, ydl.cookiejar,
|
||||||
|
'Extracted cookiejar.Cookie is not the same')
|
||||||
|
if not headers:
|
||||||
|
self.assertSameCookieStrings(
|
||||||
|
data.get('cookies'), round_trip or encoded_cookies,
|
||||||
|
msg='Cookie is not the same as round trip')
|
||||||
|
ydl.__dict__['_YoutubeDL__header_cookies'] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
_test()
|
||||||
|
except AssertionError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if not error_re:
|
||||||
|
raise
|
||||||
|
assertRegexpMatches(self, e.args[0], error_re.join(('.*',) * 2))
|
||||||
|
|
||||||
|
test('test=value; Domain=' + test_domain, [cookie('test', 'value', domain=test_domain)])
|
||||||
|
test('test=value', [cookie('test', 'value')], error_re='Unscoped cookies are not allowed')
|
||||||
|
test('cookie1=value1; Domain={0}; Path=/test; cookie2=value2; Domain={0}; Path=/'.format(test_domain), [
|
||||||
|
cookie('cookie1', 'value1', domain=test_domain, path='/test'),
|
||||||
|
cookie('cookie2', 'value2', domain=test_domain, path='/')])
|
||||||
|
cookie_kw = compat_kwargs(
|
||||||
|
{'domain': test_domain, 'path': '/test', 'secure': True, 'expires': '9999999999', })
|
||||||
|
test('test=value; Domain={domain}; Path={path}; Secure; Expires={expires}'.format(**cookie_kw), [
|
||||||
|
cookie('test', 'value', **cookie_kw)])
|
||||||
|
test('test="value; "; path=/test; domain=' + test_domain, [
|
||||||
|
cookie('test', 'value; ', domain=test_domain, path='/test')],
|
||||||
|
round_trip='test="value\\073 "; Domain={0}; Path=/test'.format(test_domain))
|
||||||
|
test('name=; Domain=' + test_domain, [cookie('name', '', domain=test_domain)],
|
||||||
|
round_trip='name=""; Domain=' + test_domain)
|
||||||
|
test('test=value', [cookie('test', 'value', domain=test_domain)], headers=True)
|
||||||
|
test('cookie1=value; Domain={0}; cookie2=value'.format(test_domain), [],
|
||||||
|
headers=True, error_re='Invalid syntax')
|
||||||
|
ydl.report_warning = ydl.report_error
|
||||||
|
test('test=value', [], headers=True, error_re='Passing cookies as a header is a potential security risk')
|
||||||
|
|
||||||
|
def test_infojson_cookies(self):
|
||||||
|
TEST_FILE = 'test_infojson_cookies.info.json'
|
||||||
|
TEST_URL = 'https://example.com/example.mp4'
|
||||||
|
COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
|
||||||
|
COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
|
||||||
|
|
||||||
|
ydl = FakeYDL()
|
||||||
|
ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
|
||||||
|
|
||||||
|
def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
|
||||||
|
fmt = {'url': TEST_URL}
|
||||||
|
if fmts_header_cookies:
|
||||||
|
fmt['http_headers'] = COOKIE_HEADER
|
||||||
|
if cookies_field:
|
||||||
|
fmt['cookies'] = COOKIES
|
||||||
|
return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
|
||||||
|
|
||||||
|
def test(initial_info, note):
|
||||||
|
|
||||||
|
def failure_msg(why):
|
||||||
|
return ' when '.join((why, note))
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
result['processed'] = ydl.process_ie_result(initial_info)
|
||||||
|
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||||
|
msg=failure_msg('No cookies set in cookiejar after initial process'))
|
||||||
|
ydl.cookiejar.clear()
|
||||||
|
with open(TEST_FILE) as infojson:
|
||||||
|
result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
|
||||||
|
result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
|
||||||
|
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||||
|
msg=failure_msg('No cookies set in cookiejar after final process'))
|
||||||
|
ydl.cookiejar.clear()
|
||||||
|
for key in ('processed', 'loaded', 'final'):
|
||||||
|
info = result[key]
|
||||||
|
self.assertIsNone(
|
||||||
|
traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
|
||||||
|
msg=failure_msg('Cookie header not removed in {0} result'.format(key)))
|
||||||
|
self.assertSameCookieStrings(
|
||||||
|
traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
|
||||||
|
msg=failure_msg('No cookies field found in {0} result'.format(key)))
|
||||||
|
|
||||||
|
test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
|
||||||
|
test(make_info(info_header_cookies=True), 'info_dict header cokies')
|
||||||
|
test(make_info(fmts_header_cookies=True), 'format header cookies')
|
||||||
|
test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
|
||||||
|
test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
|
||||||
|
test(make_info(cookies_field=True), 'cookies format field')
|
||||||
|
test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
|
||||||
|
|
||||||
|
try_rm(TEST_FILE)
|
||||||
|
|
||||||
|
def test_add_headers_cookie(self):
|
||||||
|
def check_for_cookie_header(result):
|
||||||
|
return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
|
||||||
|
|
||||||
|
ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
|
||||||
|
ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
|
||||||
|
|
||||||
|
fmt = {'url': 'https://example.com/video.mp4'}
|
||||||
|
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||||
|
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
|
||||||
|
self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
|
||||||
|
self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
|
||||||
|
|
||||||
|
fmt = {'url': 'https://wrong.com/video.mp4'}
|
||||||
|
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||||
|
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
|
||||||
|
self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
|
||||||
|
self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -46,6 +46,20 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||||||
# will be ignored
|
# will be ignored
|
||||||
self.assertFalse(cookiejar._cookies)
|
self.assertFalse(cookiejar._cookies)
|
||||||
|
|
||||||
|
def test_get_cookie_header(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||||
|
self.assertIn('HTTPONLY_COOKIE', header)
|
||||||
|
|
||||||
|
def test_get_cookies_for_url(self):
|
||||||
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
|
||||||
|
self.assertEqual(len(cookies), 2)
|
||||||
|
cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
|
||||||
|
self.assertFalse(cookies)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -23,6 +23,7 @@ from youtube_dl.compat import (
|
|||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
|
compat_urllib_request,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -135,6 +136,19 @@ class TestCompat(unittest.TestCase):
|
|||||||
self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
|
self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
|
||||||
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
|
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
|
||||||
|
|
||||||
|
def test_compat_urllib_request_Request(self):
|
||||||
|
self.assertEqual(
|
||||||
|
compat_urllib_request.Request('http://127.0.0.1', method='PUT').get_method(),
|
||||||
|
'PUT')
|
||||||
|
|
||||||
|
class PUTrequest(compat_urllib_request.Request):
|
||||||
|
def get_method(self):
|
||||||
|
return 'PUT'
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
PUTrequest('http://127.0.0.1').get_method(),
|
||||||
|
'PUT')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -9,7 +9,6 @@ import unittest
|
|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
assertGreaterEqual,
|
|
||||||
expect_warnings,
|
expect_warnings,
|
||||||
get_params,
|
get_params,
|
||||||
gettestcases,
|
gettestcases,
|
||||||
@ -20,27 +19,35 @@ from test.helper import (
|
|||||||
|
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_urllib_error,
|
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_open as open,
|
||||||
|
compat_urllib_error,
|
||||||
)
|
)
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
DownloadError,
|
DownloadError,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
|
IDENTITY,
|
||||||
|
preferredencoding,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
)
|
)
|
||||||
from youtube_dl.extractor import get_info_extractor
|
from youtube_dl.extractor import get_info_extractor
|
||||||
|
|
||||||
RETRIES = 3
|
RETRIES = 3
|
||||||
|
|
||||||
|
# Some unittest APIs require actual str
|
||||||
|
if not isinstance('TEST', str):
|
||||||
|
_encode_str = lambda s: s.encode(preferredencoding())
|
||||||
|
else:
|
||||||
|
_encode_str = IDENTITY
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -101,7 +108,7 @@ def generator(test_case, tname):
|
|||||||
|
|
||||||
def print_skipping(reason):
|
def print_skipping(reason):
|
||||||
print('Skipping %s: %s' % (test_case['name'], reason))
|
print('Skipping %s: %s' % (test_case['name'], reason))
|
||||||
self.skipTest(reason)
|
self.skipTest(_encode_str(reason))
|
||||||
|
|
||||||
if not ie.working():
|
if not ie.working():
|
||||||
print_skipping('IE marked as not _WORKING')
|
print_skipping('IE marked as not _WORKING')
|
||||||
@ -122,7 +129,10 @@ def generator(test_case, tname):
|
|||||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||||
if is_playlist and 'playlist' not in test_case:
|
if is_playlist and 'playlist' not in test_case:
|
||||||
params.setdefault('extract_flat', 'in_playlist')
|
params.setdefault('extract_flat', 'in_playlist')
|
||||||
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
params.setdefault('playlistend',
|
||||||
|
test_case['playlist_maxcount'] + 1
|
||||||
|
if test_case.get('playlist_maxcount')
|
||||||
|
else test_case.get('playlist_mincount'))
|
||||||
params.setdefault('skip_download', True)
|
params.setdefault('skip_download', True)
|
||||||
|
|
||||||
ydl = YoutubeDL(params, auto_init=False)
|
ydl = YoutubeDL(params, auto_init=False)
|
||||||
@ -183,13 +193,19 @@ def generator(test_case, tname):
|
|||||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||||
|
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
assertGreaterEqual(
|
self.assertGreaterEqual(
|
||||||
self,
|
|
||||||
len(res_dict['entries']),
|
len(res_dict['entries']),
|
||||||
test_case['playlist_mincount'],
|
test_case['playlist_mincount'],
|
||||||
'Expected at least %d in playlist %s, but got only %d' % (
|
'Expected at least %d in playlist %s, but got only %d' % (
|
||||||
test_case['playlist_mincount'], test_case['url'],
|
test_case['playlist_mincount'], test_case['url'],
|
||||||
len(res_dict['entries'])))
|
len(res_dict['entries'])))
|
||||||
|
if 'playlist_maxcount' in test_case:
|
||||||
|
self.assertLessEqual(
|
||||||
|
len(res_dict['entries']),
|
||||||
|
test_case['playlist_maxcount'],
|
||||||
|
'Expected at most %d in playlist %s, but got %d' % (
|
||||||
|
test_case['playlist_maxcount'], test_case['url'],
|
||||||
|
len(res_dict['entries'])))
|
||||||
if 'playlist_count' in test_case:
|
if 'playlist_count' in test_case:
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(res_dict['entries']),
|
len(res_dict['entries']),
|
||||||
@ -231,8 +247,8 @@ def generator(test_case, tname):
|
|||||||
if params.get('test'):
|
if params.get('test'):
|
||||||
expected_minsize = max(expected_minsize, 10000)
|
expected_minsize = max(expected_minsize, 10000)
|
||||||
got_fsize = os.path.getsize(tc_filename)
|
got_fsize = os.path.getsize(tc_filename)
|
||||||
assertGreaterEqual(
|
self.assertGreaterEqual(
|
||||||
self, got_fsize, expected_minsize,
|
got_fsize, expected_minsize,
|
||||||
'Expected %s to be at least %s, but it\'s only %s ' %
|
'Expected %s to be at least %s, but it\'s only %s ' %
|
||||||
(tc_filename, format_bytes(expected_minsize),
|
(tc_filename, format_bytes(expected_minsize),
|
||||||
format_bytes(got_fsize)))
|
format_bytes(got_fsize)))
|
||||||
@ -245,7 +261,7 @@ def generator(test_case, tname):
|
|||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
os.path.exists(info_json_fn),
|
os.path.exists(info_json_fn),
|
||||||
'Missing info file %s' % info_json_fn)
|
'Missing info file %s' % info_json_fn)
|
||||||
with io.open(info_json_fn, encoding='utf-8') as infof:
|
with open(info_json_fn, encoding='utf-8') as infof:
|
||||||
info_dict = json.load(infof)
|
info_dict = json.load(infof)
|
||||||
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
expect_info_dict(self, info_dict, tc.get('info_dict', {}))
|
||||||
finally:
|
finally:
|
||||||
|
@ -12,20 +12,69 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
FakeLogger,
|
FakeLogger,
|
||||||
|
FakeYDL,
|
||||||
http_server_port,
|
http_server_port,
|
||||||
try_rm,
|
try_rm,
|
||||||
)
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import (
|
||||||
from youtube_dl.utils import encodeFilename
|
compat_contextlib_suppress,
|
||||||
from youtube_dl.downloader.external import Aria2pFD
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_server,
|
||||||
|
compat_kwargs,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
encodeFilename,
|
||||||
|
join_nonempty,
|
||||||
|
)
|
||||||
|
from youtube_dl.downloader.external import (
|
||||||
|
Aria2cFD,
|
||||||
|
Aria2pFD,
|
||||||
|
AxelFD,
|
||||||
|
CurlFD,
|
||||||
|
FFmpegFD,
|
||||||
|
HttpieFD,
|
||||||
|
WgetFD,
|
||||||
|
)
|
||||||
|
from youtube_dl.postprocessor import (
|
||||||
|
FFmpegPostProcessor,
|
||||||
|
)
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
|
|
||||||
TEST_SIZE = 10 * 1024
|
TEST_SIZE = 10 * 1024
|
||||||
|
|
||||||
|
TEST_COOKIE = {
|
||||||
|
'version': 0,
|
||||||
|
'name': 'test',
|
||||||
|
'value': 'ytdlp',
|
||||||
|
'port': None,
|
||||||
|
'port_specified': False,
|
||||||
|
'domain': '.example.com',
|
||||||
|
'domain_specified': True,
|
||||||
|
'domain_initial_dot': False,
|
||||||
|
'path': '/',
|
||||||
|
'path_specified': True,
|
||||||
|
'secure': False,
|
||||||
|
'expires': None,
|
||||||
|
'discard': False,
|
||||||
|
'comment': None,
|
||||||
|
'comment_url': None,
|
||||||
|
'rest': {},
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_COOKIE_VALUE = join_nonempty('name', 'value', delim='=', from_dict=TEST_COOKIE)
|
||||||
|
|
||||||
|
TEST_INFO = {'url': 'http://www.example.com/'}
|
||||||
|
|
||||||
|
|
||||||
|
def cookiejar_Cookie(**cookie_args):
|
||||||
|
return compat_http_cookiejar_Cookie(**compat_kwargs(cookie_args))
|
||||||
|
|
||||||
|
|
||||||
|
def ifExternalFDAvailable(externalFD):
|
||||||
|
return unittest.skipUnless(externalFD.available(),
|
||||||
|
externalFD.get_basename() + ' not found')
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
@ -70,7 +119,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
|||||||
assert False, 'unrecognised server path'
|
assert False, 'unrecognised server path'
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
|
@ifExternalFDAvailable(Aria2pFD)
|
||||||
class TestAria2pFD(unittest.TestCase):
|
class TestAria2pFD(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
@ -111,5 +160,113 @@ class TestAria2pFD(unittest.TestCase):
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(HttpieFD)
|
||||||
|
class TestHttpieFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = HttpieFD(ydl, {})
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['http', '--download', '--output', 'test', 'http://www.example.com/'])
|
||||||
|
|
||||||
|
# Test cookie header is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['http', '--download', '--output', 'test',
|
||||||
|
'http://www.example.com/', 'Cookie:' + TEST_COOKIE_VALUE])
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(AxelFD)
|
||||||
|
class TestAxelFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = AxelFD(ydl, {})
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['axel', '-o', 'test', '--', 'http://www.example.com/'])
|
||||||
|
|
||||||
|
# Test cookie header is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertEqual(
|
||||||
|
downloader._make_cmd('test', TEST_INFO),
|
||||||
|
['axel', '-o', 'test', '-H', 'Cookie: ' + TEST_COOKIE_VALUE,
|
||||||
|
'--max-redirect=0', '--', 'http://www.example.com/'])
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(WgetFD)
|
||||||
|
class TestWgetFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = WgetFD(ydl, {})
|
||||||
|
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
# Test cookiejar tempfile arg is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(CurlFD)
|
||||||
|
class TestCurlFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = CurlFD(ydl, {})
|
||||||
|
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
# Test cookie header is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||||
|
self.assertIn(TEST_COOKIE_VALUE, downloader._make_cmd('test', TEST_INFO))
|
||||||
|
|
||||||
|
|
||||||
|
@ifExternalFDAvailable(Aria2cFD)
|
||||||
|
class TestAria2cFD(unittest.TestCase):
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = Aria2cFD(ydl, {})
|
||||||
|
downloader._make_cmd('test', TEST_INFO)
|
||||||
|
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
|
||||||
|
|
||||||
|
# Test cookiejar tempfile arg is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
cmd = downloader._make_cmd('test', TEST_INFO)
|
||||||
|
self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
|
||||||
|
|
||||||
|
|
||||||
|
# Handle delegated availability
|
||||||
|
def ifFFmpegFDAvailable(externalFD):
|
||||||
|
# raise SkipTest, or set False!
|
||||||
|
avail = ifExternalFDAvailable(externalFD) and False
|
||||||
|
with compat_contextlib_suppress(Exception):
|
||||||
|
avail = FFmpegPostProcessor(downloader=None).available
|
||||||
|
return unittest.skipUnless(
|
||||||
|
avail, externalFD.get_basename() + ' not found')
|
||||||
|
|
||||||
|
|
||||||
|
@ifFFmpegFDAvailable(FFmpegFD)
|
||||||
|
class TestFFmpegFD(unittest.TestCase):
|
||||||
|
_args = []
|
||||||
|
|
||||||
|
def _test_cmd(self, args):
|
||||||
|
self._args = args
|
||||||
|
|
||||||
|
def test_make_cmd(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
downloader = FFmpegFD(ydl, {})
|
||||||
|
downloader._debug_cmd = self._test_cmd
|
||||||
|
info_dict = TEST_INFO.copy()
|
||||||
|
info_dict['ext'] = 'mp4'
|
||||||
|
|
||||||
|
downloader._call_downloader('test', info_dict)
|
||||||
|
self.assertEqual(self._args, [
|
||||||
|
'ffmpeg', '-y', '-i', 'http://www.example.com/',
|
||||||
|
'-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||||
|
|
||||||
|
# Test cookies arg is added
|
||||||
|
ydl.cookiejar.set_cookie(cookiejar_Cookie(**TEST_COOKIE))
|
||||||
|
downloader._call_downloader('test', info_dict)
|
||||||
|
self.assertEqual(self._args, [
|
||||||
|
'ffmpeg', '-y', '-cookies', TEST_COOKIE_VALUE + '; path=/; domain=.example.com;\r\n',
|
||||||
|
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -88,7 +88,7 @@ class TestHttpFD(unittest.TestCase):
|
|||||||
self.assertTrue(downloader.real_download(filename, {
|
self.assertTrue(downloader.real_download(filename, {
|
||||||
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
'url': 'http://127.0.0.1:%d/%s' % (self.port, ep),
|
||||||
}))
|
}))
|
||||||
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
|
||||||
try_rm(encodeFilename(filename))
|
try_rm(encodeFilename(filename))
|
||||||
|
|
||||||
def download_all(self, params):
|
def download_all(self, params):
|
||||||
|
@ -8,47 +8,53 @@ import unittest
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
from youtube_dl.utils import encodeArgument
|
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
sys.path.insert(0, rootDir)
|
||||||
|
|
||||||
try:
|
from youtube_dl.compat import compat_register_utf8, compat_subprocess_get_DEVNULL
|
||||||
_DEV_NULL = subprocess.DEVNULL
|
from youtube_dl.utils import encodeArgument
|
||||||
except AttributeError:
|
|
||||||
_DEV_NULL = open(os.devnull, 'wb')
|
compat_register_utf8()
|
||||||
|
|
||||||
|
|
||||||
|
_DEV_NULL = compat_subprocess_get_DEVNULL()
|
||||||
|
|
||||||
|
|
||||||
class TestExecution(unittest.TestCase):
|
class TestExecution(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.module = 'youtube_dl'
|
||||||
|
if sys.version_info < (2, 7):
|
||||||
|
self.module += '.__main__'
|
||||||
|
|
||||||
def test_import(self):
|
def test_import(self):
|
||||||
subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
|
subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
|
||||||
|
|
||||||
def test_module_exec(self):
|
def test_module_exec(self):
|
||||||
if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution
|
subprocess.check_call([sys.executable, '-m', self.module, '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
|
||||||
|
|
||||||
def test_main_exec(self):
|
def test_main_exec(self):
|
||||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, os.path.normpath('youtube_dl/__main__.py'), '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
|
|
||||||
def test_cmdline_umlauts(self):
|
def test_cmdline_umlauts(self):
|
||||||
|
os.environ['PYTHONIOENCODING'] = 'utf-8'
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
[sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
|
[sys.executable, '-m', self.module, encodeArgument('ä'), '--version'],
|
||||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
def test_lazy_extractors(self):
|
def test_lazy_extractors(self):
|
||||||
lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
|
lazy_extractors = os.path.normpath('youtube_dl/extractor/lazy_extractors.py')
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, os.path.normpath('devscripts/make_lazy_extractors.py'), lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, os.path.normpath('test/test_all_urls.py')], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
finally:
|
finally:
|
||||||
for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
|
for x in ('', 'c') if sys.version_info[0] < 3 else ('',):
|
||||||
try:
|
try:
|
||||||
os.remove(lazy_extractors + x)
|
os.remove(lazy_extractors + x)
|
||||||
except (IOError, OSError):
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,33 +8,163 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import gzip
|
||||||
|
import io
|
||||||
|
import ssl
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
# avoid deprecated alias assertRaisesRegexp
|
||||||
|
if hasattr(unittest.TestCase, 'assertRaisesRegex'):
|
||||||
|
unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
|
||||||
|
|
||||||
|
try:
|
||||||
|
import brotli
|
||||||
|
except ImportError:
|
||||||
|
brotli = None
|
||||||
|
try:
|
||||||
|
from urllib.request import pathname2url
|
||||||
|
except ImportError:
|
||||||
|
from urllib import pathname2url
|
||||||
|
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_server,
|
||||||
|
compat_str as str,
|
||||||
|
compat_urllib_error,
|
||||||
|
compat_urllib_HTTPError,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
sanitized_Request,
|
||||||
|
update_Request,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
|
expectedFailureIf,
|
||||||
|
FakeYDL,
|
||||||
FakeLogger,
|
FakeLogger,
|
||||||
http_server_port,
|
http_server_port,
|
||||||
)
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
|
||||||
import ssl
|
|
||||||
import threading
|
|
||||||
|
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
protocol_version = 'HTTP/1.1'
|
||||||
|
|
||||||
|
# work-around old/new -style class inheritance
|
||||||
|
def super(self, meth_name, *args, **kwargs):
|
||||||
|
from types import MethodType
|
||||||
|
try:
|
||||||
|
super()
|
||||||
|
fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)
|
||||||
|
except TypeError:
|
||||||
|
fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)
|
||||||
|
self.super = MethodType(fn, self)
|
||||||
|
return self.super(meth_name, *args, **kwargs)
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def do_GET(self):
|
def _headers(self):
|
||||||
if self.path == '/video.html':
|
payload = str(self.headers).encode('utf-8')
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'application/json')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _redirect(self):
|
||||||
|
self.send_response(int(self.path[len('/redirect_'):]))
|
||||||
|
self.send_header('Location', '/method')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
def _method(self, method, payload=None):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Length', str(len(payload or '')))
|
||||||
|
self.send_header('Method', method)
|
||||||
|
self.end_headers()
|
||||||
|
if payload:
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _status(self, status):
|
||||||
|
payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8')
|
||||||
|
self.send_response(int(status))
|
||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def _read_data(self):
|
||||||
|
if 'Content-Length' in self.headers:
|
||||||
|
return self.rfile.read(int(self.headers['Content-Length']))
|
||||||
|
|
||||||
|
def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
|
||||||
|
return '{0}://{1}:{2}/{3}'.format(
|
||||||
|
scheme, host,
|
||||||
|
port if port is not None
|
||||||
|
else http_server_port(self.server), path)
|
||||||
|
|
||||||
|
def do_POST(self):
|
||||||
|
data = self._read_data()
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('POST', data)
|
||||||
|
elif self.path.startswith('/headers'):
|
||||||
|
self._headers()
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_HEAD(self):
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('HEAD')
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_PUT(self):
|
||||||
|
data = self._read_data()
|
||||||
|
if self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('PUT', data)
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
|
||||||
|
def respond(payload=b'<html><video src="/vid.mp4" /></html>',
|
||||||
|
payload_type='text/html; charset=utf-8',
|
||||||
|
payload_encoding=None,
|
||||||
|
resp_code=200):
|
||||||
|
self.send_response(resp_code)
|
||||||
|
self.send_header('Content-Type', payload_type)
|
||||||
|
if payload_encoding:
|
||||||
|
self.send_header('Content-Encoding', payload_encoding)
|
||||||
|
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload)
|
||||||
|
|
||||||
|
def gzip_compress(p):
|
||||||
|
buf = io.BytesIO()
|
||||||
|
with contextlib.closing(gzip.GzipFile(fileobj=buf, mode='wb')) as f:
|
||||||
|
f.write(p)
|
||||||
|
return buf.getvalue()
|
||||||
|
|
||||||
|
if self.path == '/video.html':
|
||||||
|
respond()
|
||||||
elif self.path == '/vid.mp4':
|
elif self.path == '/vid.mp4':
|
||||||
self.send_response(200)
|
respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')
|
||||||
self.send_header('Content-Type', 'video/mp4')
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]')
|
|
||||||
elif self.path == '/302':
|
elif self.path == '/302':
|
||||||
if sys.version_info[0] == 3:
|
if sys.version_info[0] == 3:
|
||||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
# XXX: Python 3 http server does not allow non-ASCII header values
|
||||||
@ -42,60 +172,336 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
|||||||
self.end_headers()
|
self.end_headers()
|
||||||
return
|
return
|
||||||
|
|
||||||
new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server)
|
new_url = self._test_url('中文.html')
|
||||||
self.send_response(302)
|
self.send_response(302)
|
||||||
self.send_header(b'Location', new_url.encode('utf-8'))
|
self.send_header(b'Location', new_url.encode('utf-8'))
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||||
self.send_response(200)
|
respond()
|
||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
elif self.path == '/%c7%9f':
|
||||||
|
respond()
|
||||||
|
elif self.path == '/redirect_dotsegments':
|
||||||
|
self.send_response(301)
|
||||||
|
# redirect to /headers but with dot segments before
|
||||||
|
self.send_header('Location', '/a/b/./../../headers')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(b'<html><video src="/vid.mp4" /></html>')
|
elif self.path.startswith('/redirect_'):
|
||||||
|
self._redirect()
|
||||||
|
elif self.path.startswith('/method'):
|
||||||
|
self._method('GET')
|
||||||
|
elif self.path.startswith('/headers'):
|
||||||
|
self._headers()
|
||||||
|
elif self.path.startswith('/308-to-headers'):
|
||||||
|
self.send_response(308)
|
||||||
|
self.send_header('Location', '/headers')
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
elif self.path == '/trailing_garbage':
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
compressed = gzip_compress(payload) + b'trailing garbage'
|
||||||
|
respond(compressed, payload_encoding='gzip')
|
||||||
|
elif self.path == '/302-non-ascii-redirect':
|
||||||
|
new_url = self._test_url('中文.html')
|
||||||
|
# actually respond with permanent redirect
|
||||||
|
self.send_response(301)
|
||||||
|
self.send_header('Location', new_url)
|
||||||
|
self.send_header('Content-Length', '0')
|
||||||
|
self.end_headers()
|
||||||
|
elif self.path == '/content-encoding':
|
||||||
|
encodings = self.headers.get('ytdl-encoding', '')
|
||||||
|
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||||
|
if encoding == 'br' and brotli:
|
||||||
|
payload = brotli.compress(payload)
|
||||||
|
elif encoding == 'gzip':
|
||||||
|
payload = gzip_compress(payload)
|
||||||
|
elif encoding == 'deflate':
|
||||||
|
payload = zlib.compress(payload)
|
||||||
|
elif encoding == 'unsupported':
|
||||||
|
payload = b'raw'
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
assert False
|
self._status(415)
|
||||||
|
return
|
||||||
|
respond(payload, payload_encoding=encodings)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self._status(404)
|
||||||
|
|
||||||
|
def send_header(self, keyword, value):
|
||||||
|
"""
|
||||||
|
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
|
||||||
|
This is against what is defined in RFC 3986: but we need to test that we support this
|
||||||
|
since some sites incorrectly do this.
|
||||||
|
"""
|
||||||
|
if keyword.lower() == 'connection':
|
||||||
|
return self.super('send_header', keyword, value)
|
||||||
|
|
||||||
|
if not hasattr(self, '_headers_buffer'):
|
||||||
|
self._headers_buffer = []
|
||||||
|
|
||||||
|
self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))
|
||||||
|
|
||||||
|
def end_headers(self):
|
||||||
|
if hasattr(self, '_headers_buffer'):
|
||||||
|
self.wfile.write(b''.join(self._headers_buffer))
|
||||||
|
self._headers_buffer = []
|
||||||
|
self.super('end_headers')
|
||||||
|
|
||||||
|
|
||||||
class TestHTTP(unittest.TestCase):
|
class TestHTTP(unittest.TestCase):
|
||||||
|
# when does it make sense to check the SSL certificate?
|
||||||
|
_check_cert = (
|
||||||
|
sys.version_info >= (3, 2)
|
||||||
|
or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19)))
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
# HTTP server
|
||||||
|
self.http_httpd = compat_http_server.HTTPServer(
|
||||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
self.port = http_server_port(self.httpd)
|
self.http_port = http_server_port(self.http_httpd)
|
||||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
|
||||||
self.server_thread.daemon = True
|
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
|
||||||
self.server_thread.start()
|
self.http_server_thread.daemon = True
|
||||||
|
self.http_server_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
except ImportError:
|
||||||
|
try:
|
||||||
|
from socketserver import ThreadingMixIn
|
||||||
|
except ImportError:
|
||||||
|
from SocketServer import ThreadingMixIn
|
||||||
|
|
||||||
|
class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# HTTPS server
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
self.https_httpd = ThreadingHTTPServer(
|
||||||
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
|
try:
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
||||||
|
sslctx.verify_mode = ssl.CERT_NONE
|
||||||
|
sslctx.check_hostname = False
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
self.https_httpd.socket = sslctx.wrap_socket(
|
||||||
|
self.https_httpd.socket, server_side=True)
|
||||||
|
except AttributeError:
|
||||||
|
self.https_httpd.socket = ssl.wrap_socket(
|
||||||
|
self.https_httpd.socket, certfile=certfn, server_side=True)
|
||||||
|
|
||||||
|
self.https_port = http_server_port(self.https_httpd)
|
||||||
|
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
|
||||||
|
self.https_server_thread.daemon = True
|
||||||
|
self.https_server_thread.start()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
|
||||||
|
def closer(svr):
|
||||||
|
def _closer():
|
||||||
|
svr.shutdown()
|
||||||
|
svr.server_close()
|
||||||
|
return _closer
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.http_httpd))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.http_server_thread.join(2.0)
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.https_httpd))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.https_server_thread.join(2.0)
|
||||||
|
|
||||||
|
def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
|
||||||
|
return '{0}://{1}:{2}/{3}'.format(
|
||||||
|
scheme, host,
|
||||||
|
port if port is not None
|
||||||
|
else self.https_port if scheme == 'https'
|
||||||
|
else self.http_port, path)
|
||||||
|
|
||||||
|
@unittest.skipUnless(_check_cert, 'No support for certificate check in SSL')
|
||||||
|
def test_nocheckcertificate(self):
|
||||||
|
with FakeYDL({'logger': FakeLogger()}) as ydl:
|
||||||
|
with self.assertRaises(compat_urllib_error.URLError):
|
||||||
|
ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
|
||||||
|
|
||||||
|
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
|
||||||
|
r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
|
||||||
|
self.assertEqual(r.getcode(), 200)
|
||||||
|
r.close()
|
||||||
|
|
||||||
|
def test_percent_encode(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))
|
||||||
|
self.assertEqual(res.getcode(), 200)
|
||||||
|
res.close()
|
||||||
|
# don't normalize existing percent encodings
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))
|
||||||
|
self.assertEqual(res.getcode(), 200)
|
||||||
|
res.close()
|
||||||
|
|
||||||
def test_unicode_path_redirection(self):
|
def test_unicode_path_redirection(self):
|
||||||
# XXX: Python 3 http server does not allow non-ASCII header values
|
with FakeYDL() as ydl:
|
||||||
if sys.version_info[0] == 3:
|
r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))
|
||||||
return
|
self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))
|
||||||
|
r.close()
|
||||||
|
|
||||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
def test_redirect(self):
|
||||||
r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port)
|
with FakeYDL() as ydl:
|
||||||
self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port)
|
def do_req(redirect_status, method, check_no_content=False):
|
||||||
|
data = b'testdata' if method in ('POST', 'PUT') else None
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('redirect_{0}'.format(redirect_status)),
|
||||||
|
method=method, data=data))
|
||||||
|
if check_no_content:
|
||||||
|
self.assertNotIn('Content-Type', res.headers)
|
||||||
|
return res.read().decode('utf-8'), res.headers.get('method', '')
|
||||||
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
|
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
|
||||||
|
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
|
||||||
|
|
||||||
class TestHTTPS(unittest.TestCase):
|
# 301 and 302 turn POST only into a GET, with no Content-Type
|
||||||
def setUp(self):
|
self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))
|
||||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))
|
||||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
|
||||||
self.httpd.socket = ssl.wrap_socket(
|
|
||||||
self.httpd.socket, certfile=certfn, server_side=True)
|
|
||||||
self.port = http_server_port(self.httpd)
|
|
||||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
|
||||||
self.server_thread.daemon = True
|
|
||||||
self.server_thread.start()
|
|
||||||
|
|
||||||
def test_nocheckcertificate(self):
|
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
|
||||||
if sys.version_info >= (2, 7, 9): # No certificate checking anyways
|
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
|
||||||
ydl = YoutubeDL({'logger': FakeLogger()})
|
|
||||||
self.assertRaises(
|
|
||||||
Exception,
|
|
||||||
ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port)
|
|
||||||
|
|
||||||
ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True})
|
# 307 and 308 should not change method
|
||||||
r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port)
|
for m in ('POST', 'PUT'):
|
||||||
self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port)
|
self.assertEqual(do_req(307, m), ('testdata', m))
|
||||||
|
self.assertEqual(do_req(308, m), ('testdata', m))
|
||||||
|
|
||||||
|
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
|
||||||
|
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
|
||||||
|
|
||||||
|
# These should not redirect and instead raise an HTTPError
|
||||||
|
for code in (300, 304, 305, 306):
|
||||||
|
with self.assertRaises(compat_urllib_HTTPError):
|
||||||
|
do_req(code, 'GET')
|
||||||
|
|
||||||
|
# Jython 2.7.1 times out for some reason
|
||||||
|
@expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2))
|
||||||
|
def test_content_type(self):
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
|
||||||
|
with FakeYDL({'nocheckcertificate': True}) as ydl:
|
||||||
|
# method should be auto-detected as POST
|
||||||
|
r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))
|
||||||
|
|
||||||
|
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||||
|
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||||
|
|
||||||
|
# test http
|
||||||
|
r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))
|
||||||
|
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||||
|
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||||
|
|
||||||
|
def test_update_req(self):
|
||||||
|
req = sanitized_Request('http://example.com')
|
||||||
|
assert req.data is None
|
||||||
|
assert req.get_method() == 'GET'
|
||||||
|
assert not req.has_header('Content-Type')
|
||||||
|
# Test that zero-byte payloads will be sent
|
||||||
|
req = update_Request(req, data=b'')
|
||||||
|
assert req.data == b''
|
||||||
|
assert req.get_method() == 'POST'
|
||||||
|
# yt-dl expects data to be encoded and Content-Type to be added by sender
|
||||||
|
# assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded'
|
||||||
|
|
||||||
|
def test_cookiejar(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
|
||||||
|
0, 'test', 'ytdl', None, False, '127.0.0.1', True,
|
||||||
|
False, '/headers', True, False, None, False, None, None, {}))
|
||||||
|
data = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('headers'))).read().decode('utf-8')
|
||||||
|
self.assertIn('Cookie: test=ytdl', data)
|
||||||
|
|
||||||
|
def test_passed_cookie_header(self):
|
||||||
|
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
# Specified Cookie header should be used
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
|
||||||
|
self.assertIn('Cookie: test=test', res)
|
||||||
|
|
||||||
|
# Specified Cookie header should be removed on any redirect
|
||||||
|
res = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
|
||||||
|
self.assertNotIn('Cookie: test=test', res)
|
||||||
|
|
||||||
|
# Specified Cookie header should override global cookiejar for that request
|
||||||
|
ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
|
||||||
|
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||||
|
False, '/headers', True, False, None, False, None, None, {}))
|
||||||
|
data = ydl.urlopen(sanitized_Request(
|
||||||
|
self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')
|
||||||
|
self.assertNotIn('Cookie: test=ytdlp', data)
|
||||||
|
self.assertIn('Cookie: test=test', data)
|
||||||
|
|
||||||
|
def test_no_compression_compat_header(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
data = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
self._test_url('headers'),
|
||||||
|
headers={'Youtubedl-no-compression': True})).read()
|
||||||
|
self.assertIn(b'Accept-Encoding: identity', data)
|
||||||
|
self.assertNotIn(b'youtubedl-no-compression', data.lower())
|
||||||
|
|
||||||
|
def test_gzip_trailing_garbage(self):
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')
|
||||||
|
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
def __test_compression(self, encoding):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
self._test_url('content-encoding'),
|
||||||
|
headers={'ytdl-encoding': encoding}))
|
||||||
|
# decoded encodings are removed: only check for valid decompressed data
|
||||||
|
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||||
|
|
||||||
|
@unittest.skipUnless(brotli, 'brotli support is not installed')
|
||||||
|
def test_brotli(self):
|
||||||
|
self.__test_compression('br')
|
||||||
|
|
||||||
|
def test_deflate(self):
|
||||||
|
self.__test_compression('deflate')
|
||||||
|
|
||||||
|
def test_gzip(self):
|
||||||
|
self.__test_compression('gzip')
|
||||||
|
|
||||||
|
def test_multiple_encodings(self):
|
||||||
|
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
|
||||||
|
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||||
|
self.__test_compression(pair)
|
||||||
|
|
||||||
|
def test_unsupported_encoding(self):
|
||||||
|
# it should return the raw content
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(
|
||||||
|
sanitized_Request(
|
||||||
|
self._test_url('content-encoding'),
|
||||||
|
headers={'ytdl-encoding': 'unsupported'}))
|
||||||
|
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
|
||||||
|
self.assertEqual(res.read(), b'raw')
|
||||||
|
|
||||||
|
def test_remove_dot_segments(self):
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))
|
||||||
|
self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
|
||||||
|
|
||||||
|
res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))
|
||||||
|
self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')
|
||||||
|
|
||||||
|
|
||||||
def _build_proxy_handler(name):
|
def _build_proxy_handler(name):
|
||||||
@ -109,7 +515,7 @@ def _build_proxy_handler(name):
|
|||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))
|
||||||
return HTTPTestRequestHandler
|
return HTTPTestRequestHandler
|
||||||
|
|
||||||
|
|
||||||
@ -129,10 +535,30 @@ class TestProxy(unittest.TestCase):
|
|||||||
self.geo_proxy_thread.daemon = True
|
self.geo_proxy_thread.daemon = True
|
||||||
self.geo_proxy_thread.start()
|
self.geo_proxy_thread.start()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
|
||||||
|
def closer(svr):
|
||||||
|
def _closer():
|
||||||
|
svr.shutdown()
|
||||||
|
svr.server_close()
|
||||||
|
return _closer
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.proxy))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.proxy_thread.join(2.0)
|
||||||
|
|
||||||
|
shutdown_thread = threading.Thread(target=closer(self.geo_proxy))
|
||||||
|
shutdown_thread.start()
|
||||||
|
self.geo_proxy_thread.join(2.0)
|
||||||
|
|
||||||
|
def _test_proxy(self, host='127.0.0.1', port=None):
|
||||||
|
return '{0}:{1}'.format(
|
||||||
|
host, port if port is not None else self.port)
|
||||||
|
|
||||||
def test_proxy(self):
|
def test_proxy(self):
|
||||||
geo_proxy = '127.0.0.1:{0}'.format(self.geo_port)
|
geo_proxy = self._test_proxy(port=self.geo_port)
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
'proxy': self._test_proxy(),
|
||||||
'geo_verification_proxy': geo_proxy,
|
'geo_verification_proxy': geo_proxy,
|
||||||
})
|
})
|
||||||
url = 'http://foo.com/bar'
|
url = 'http://foo.com/bar'
|
||||||
@ -146,7 +572,7 @@ class TestProxy(unittest.TestCase):
|
|||||||
|
|
||||||
def test_proxy_with_idn(self):
|
def test_proxy_with_idn(self):
|
||||||
ydl = YoutubeDL({
|
ydl = YoutubeDL({
|
||||||
'proxy': '127.0.0.1:{0}'.format(self.port),
|
'proxy': self._test_proxy(),
|
||||||
})
|
})
|
||||||
url = 'http://中文.tw/'
|
url = 'http://中文.tw/'
|
||||||
response = ydl.urlopen(url).read().decode('utf-8')
|
response = ydl.urlopen(url).read().decode('utf-8')
|
||||||
@ -154,5 +580,25 @@ class TestProxy(unittest.TestCase):
|
|||||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||||
|
|
||||||
|
|
||||||
|
class TestFileURL(unittest.TestCase):
|
||||||
|
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
|
def test_file_urls(self):
|
||||||
|
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
tf.write(b'foobar')
|
||||||
|
tf.close()
|
||||||
|
url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))
|
||||||
|
with FakeYDL() as ydl:
|
||||||
|
self.assertRaisesRegexp(
|
||||||
|
compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)
|
||||||
|
# not yet implemented
|
||||||
|
"""
|
||||||
|
with FakeYDL({'enable_file_urls': True}) as ydl:
|
||||||
|
res = ydl.urlopen(url)
|
||||||
|
self.assertEqual(res.read(), b'foobar')
|
||||||
|
res.close()
|
||||||
|
"""
|
||||||
|
os.unlink(tf.name)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -11,144 +11,146 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from youtube_dl.compat import compat_str
|
||||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||||
|
|
||||||
|
NaN = object()
|
||||||
|
|
||||||
|
|
||||||
class TestJSInterpreter(unittest.TestCase):
|
class TestJSInterpreter(unittest.TestCase):
|
||||||
|
def _test(self, jsi_or_code, expected, func='f', args=()):
|
||||||
|
if isinstance(jsi_or_code, compat_str):
|
||||||
|
jsi_or_code = JSInterpreter(jsi_or_code)
|
||||||
|
got = jsi_or_code.call_function(func, *args)
|
||||||
|
if expected is NaN:
|
||||||
|
self.assertTrue(math.isnan(got), '{0} is not NaN'.format(got))
|
||||||
|
else:
|
||||||
|
self.assertEqual(got, expected)
|
||||||
|
|
||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
jsi = JSInterpreter('function x(){;}')
|
jsi = JSInterpreter('function f(){;}')
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
|
||||||
|
self._test(jsi, None)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x3(){return 42;}')
|
self._test('function f(){return 42;}', 42)
|
||||||
self.assertEqual(jsi.call_function('x3'), 42)
|
self._test('function f(){42}', None)
|
||||||
|
self._test('var f = function(){return 42;}', 42)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x3(){42}')
|
def test_add(self):
|
||||||
self.assertEqual(jsi.call_function('x3'), None)
|
self._test('function f(){return 42 + 7;}', 49)
|
||||||
|
self._test('function f(){return 42 + undefined;}', NaN)
|
||||||
|
self._test('function f(){return 42 + null;}', 42)
|
||||||
|
|
||||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
def test_sub(self):
|
||||||
self.assertEqual(jsi.call_function('x5'), 42)
|
self._test('function f(){return 42 - 7;}', 35)
|
||||||
|
self._test('function f(){return 42 - undefined;}', NaN)
|
||||||
|
self._test('function f(){return 42 - null;}', 42)
|
||||||
|
|
||||||
|
def test_mul(self):
|
||||||
|
self._test('function f(){return 42 * 7;}', 294)
|
||||||
|
self._test('function f(){return 42 * undefined;}', NaN)
|
||||||
|
self._test('function f(){return 42 * null;}', 0)
|
||||||
|
|
||||||
|
def test_div(self):
|
||||||
|
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
||||||
|
self._test(jsi, NaN, args=(0, 0))
|
||||||
|
self._test(jsi, NaN, args=(JS_Undefined, 1))
|
||||||
|
self._test(jsi, float('inf'), args=(2, 0))
|
||||||
|
self._test(jsi, 0, args=(0, 3))
|
||||||
|
|
||||||
|
def test_mod(self):
|
||||||
|
self._test('function f(){return 42 % 7;}', 0)
|
||||||
|
self._test('function f(){return 42 % 0;}', NaN)
|
||||||
|
self._test('function f(){return 42 % undefined;}', NaN)
|
||||||
|
|
||||||
|
def test_exp(self):
|
||||||
|
self._test('function f(){return 42 ** 2;}', 1764)
|
||||||
|
self._test('function f(){return 42 ** undefined;}', NaN)
|
||||||
|
self._test('function f(){return 42 ** null;}', 1)
|
||||||
|
self._test('function f(){return undefined ** 42;}', NaN)
|
||||||
|
|
||||||
def test_calc(self):
|
def test_calc(self):
|
||||||
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
|
self._test('function f(a){return 2*a+1;}', 7, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x4', 3), 7)
|
|
||||||
|
|
||||||
def test_empty_return(self):
|
def test_empty_return(self):
|
||||||
jsi = JSInterpreter('function f(){return; y()}')
|
self._test('function f(){return; y()}', None)
|
||||||
self.assertEqual(jsi.call_function('f'), None)
|
|
||||||
|
|
||||||
def test_morespace(self):
|
def test_morespace(self):
|
||||||
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }')
|
self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x', 3), 7)
|
self._test('function f () { x = 2 ; return x; }', 2)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
|
||||||
|
|
||||||
def test_strange_chars(self):
|
def test_strange_chars(self):
|
||||||
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }')
|
self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
|
||||||
self.assertEqual(jsi.call_function('$_xY1', 20), 21)
|
21, args=[20], func='$_xY1')
|
||||||
|
|
||||||
def test_operators(self):
|
def test_operators(self):
|
||||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
self._test('function f(){return 1 << 5;}', 32)
|
||||||
self.assertEqual(jsi.call_function('f'), 32)
|
self._test('function f(){return 2 ** 5}', 32)
|
||||||
|
self._test('function f(){return 19 & 21;}', 17)
|
||||||
jsi = JSInterpreter('function f(){return 2 ** 5}')
|
self._test('function f(){return 11 >> 2;}', 2)
|
||||||
self.assertEqual(jsi.call_function('f'), 32)
|
self._test('function f(){return []? 2+3: 4;}', 5)
|
||||||
|
self._test('function f(){return 1 == 2}', False)
|
||||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||||
self.assertEqual(jsi.call_function('f'), 17)
|
self._test('function f(){return 0 ?? 42;}', 0)
|
||||||
|
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
# https://github.com/ytdl-org/youtube-dl/issues/32815
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
self._test('function f(){return 0 - 7 * - 6;}', 42)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 5)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 1 == 2}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), False)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 2)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 0)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
|
||||||
self.assertFalse(jsi.call_function('f'))
|
|
||||||
|
|
||||||
def test_array_access(self):
|
def test_array_access(self):
|
||||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
||||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
|
||||||
|
|
||||||
def test_parens(self):
|
def test_parens(self):
|
||||||
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}')
|
self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
|
||||||
self.assertEqual(jsi.call_function('f'), 7)
|
self._test('function f(){return (1 + 2) * 3;}', 9)
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 9)
|
|
||||||
|
|
||||||
def test_quotes(self):
|
def test_quotes(self):
|
||||||
jsi = JSInterpreter(r'function f(){return "a\"\\("}')
|
self._test(r'function f(){return "a\"\\("}', r'a"\(')
|
||||||
self.assertEqual(jsi.call_function('f'), r'a"\(')
|
|
||||||
|
|
||||||
def test_assignments(self):
|
def test_assignments(self):
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
|
||||||
self.assertEqual(jsi.call_function('f'), 31)
|
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||||
|
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), 51)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
|
|
||||||
self.assertEqual(jsi.call_function('f'), -11)
|
|
||||||
|
|
||||||
|
@unittest.skip('Not yet fully implemented')
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
'Skipping: Not yet fully implemented'
|
self._test('''
|
||||||
return
|
function f() {
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() {
|
|
||||||
var x = /* 1 + */ 2;
|
var x = /* 1 + */ 2;
|
||||||
var y = /* 30
|
var y = /* 30
|
||||||
* 40 */ 50;
|
* 40 */ 50;
|
||||||
return x + y;
|
return x + y;
|
||||||
}
|
}
|
||||||
''')
|
''', 52)
|
||||||
self.assertEqual(jsi.call_function('x'), 52)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function f() {
|
function f() {
|
||||||
var x = "/*";
|
var x = "/*";
|
||||||
var y = 1 /* comment */ + 2;
|
var y = 1 /* comment */ + 2;
|
||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
''')
|
''', 3)
|
||||||
self.assertEqual(jsi.call_function('f'), 3)
|
|
||||||
|
|
||||||
def test_precedence(self):
|
def test_precedence(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
var a = [10, 20, 30, 40, 50];
|
var a = [10, 20, 30, 40, 50];
|
||||||
var b = 6;
|
var b = 6;
|
||||||
a[0]=a[b%a.length];
|
a[0]=a[b%a.length];
|
||||||
return a;
|
return a;
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
''', [20, 20, 30, 40, 50])
|
||||||
|
|
||||||
def test_builtins(self):
|
def test_builtins(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return NaN }', NaN)
|
||||||
function x() { return NaN }
|
|
||||||
''')
|
|
||||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
|
||||||
|
|
||||||
def test_Date(self):
|
def test_Date(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
|
||||||
function x(dt) { return new Date(dt) - 0; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
|
||||||
# date format m/d/y
|
# date format m/d/y
|
||||||
self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000)
|
self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
|
||||||
|
|
||||||
# epoch 0
|
# epoch 0
|
||||||
self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0)
|
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
|
||||||
|
|
||||||
def test_call(self):
|
def test_call(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
@ -156,179 +158,115 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
function y(a) { return x() + (a?a:0); }
|
function y(a) { return x() + (a?a:0); }
|
||||||
function z() { return y(3); }
|
function z() { return y(3); }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('z'), 5)
|
self._test(jsi, 5, func='z')
|
||||||
self.assertEqual(jsi.call_function('y'), 2)
|
self._test(jsi, 2, func='y')
|
||||||
|
|
||||||
def test_if(self):
|
def test_if(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
let a = 9;
|
let a = 9;
|
||||||
if (0==0) {a++}
|
if (0==0) {a++}
|
||||||
return a
|
return a
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0==0) {return 10}
|
if (0==0) {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0!=0) {return 1}
|
if (0!=0) {return 1}
|
||||||
else {return 10}
|
else {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
""" # Unsupported
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() {
|
|
||||||
if (0!=0) return 1;
|
|
||||||
else {return 10}
|
|
||||||
}''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_elseif(self):
|
def test_elseif(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
if (0!=0) {return 1}
|
if (0!=0) {return 1}
|
||||||
else if (1==0) {return 2}
|
else if (1==0) {return 2}
|
||||||
else {return 10}
|
else {return 10}
|
||||||
}''')
|
}
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
''', 10)
|
||||||
|
|
||||||
""" # Unsupported
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() {
|
|
||||||
if (0!=0) return 1;
|
|
||||||
else if (1==0) {return 2}
|
|
||||||
else {return 10}
|
|
||||||
}''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
# etc
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_for_loop(self):
|
def test_for_loop(self):
|
||||||
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_while_loop(self):
|
def test_while_loop(self):
|
||||||
# function x() { a=0; while (a<10) {a++} a }
|
self._test('function f() { a=0; while (a<10) {a++} return a }', 10)
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=0; while (a<10) {a++} return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_switch(self):
|
def test_switch(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function f(x) { switch(x){
|
||||||
case 1:f+=1;
|
case 1:x+=1;
|
||||||
case 2:f+=2;
|
case 2:x+=2;
|
||||||
case 3:f+=3;break;
|
case 3:x+=3;break;
|
||||||
case 4:f+=4;
|
case 4:x+=4;
|
||||||
default:f=0;
|
default:x=0;
|
||||||
} return f }
|
} return x }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 1), 7)
|
self._test(jsi, 7, args=[1])
|
||||||
self.assertEqual(jsi.call_function('x', 3), 6)
|
self._test(jsi, 6, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x', 5), 0)
|
self._test(jsi, 0, args=[5])
|
||||||
|
|
||||||
def test_switch_default(self):
|
def test_switch_default(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x(f) { switch(f){
|
function f(x) { switch(x){
|
||||||
case 2: f+=2;
|
case 2: x+=2;
|
||||||
default: f-=1;
|
default: x-=1;
|
||||||
case 5:
|
case 5:
|
||||||
case 6: f+=6;
|
case 6: x+=6;
|
||||||
case 0: break;
|
case 0: break;
|
||||||
case 1: f+=1;
|
case 1: x+=1;
|
||||||
} return f }
|
} return x }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x', 1), 2)
|
self._test(jsi, 2, args=[1])
|
||||||
self.assertEqual(jsi.call_function('x', 5), 11)
|
self._test(jsi, 11, args=[5])
|
||||||
self.assertEqual(jsi.call_function('x', 9), 14)
|
self._test(jsi, 14, args=[9])
|
||||||
|
|
||||||
def test_try(self):
|
def test_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{return 10} catch(e){return 5} }', 10)
|
||||||
function x() { try{return 10} catch(e){return 5} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 10)
|
|
||||||
|
|
||||||
def test_catch(self):
|
def test_catch(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
|
||||||
function x() { try{throw 10} catch(e){return 5} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_finally(self):
|
def test_finally(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { try{throw 10} finally {return 42} }', 42)
|
||||||
function x() { try{throw 10} finally {return 42} }
|
self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_nested_try(self):
|
def test_nested_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {try {
|
function f() {try {
|
||||||
try{throw 10} finally {throw 42}
|
try{throw 10} finally {throw 42}
|
||||||
} catch(e){return 5} }
|
} catch(e){return 5} }
|
||||||
''')
|
''', 5)
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_for_loop_continue(self):
|
def test_for_loop_continue(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
|
||||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
|
||||||
|
|
||||||
def test_for_loop_break(self):
|
def test_for_loop_break(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
|
||||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 0)
|
|
||||||
|
|
||||||
def test_for_loop_try(self):
|
def test_for_loop_try(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() {
|
function f() {
|
||||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||||
return 42 }
|
return 42 }
|
||||||
''')
|
''', 42)
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_literal_list(self):
|
def test_literal_list(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
|
||||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
|
||||||
|
|
||||||
def test_comma(self):
|
def test_comma(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
|
||||||
function x() { a=5; a -= 1, a+=3; return a }
|
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
|
||||||
''')
|
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
||||||
self.assertEqual(jsi.call_function('x'), 7)
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { a=5; return (a -= 1, a+=3, a); }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 7)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
|
|
||||||
def test_void(self):
|
def test_void(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return void 42; }', None)
|
||||||
function x() { return void 42; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), None)
|
|
||||||
|
|
||||||
def test_return_function(self):
|
def test_return_function(self):
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
@ -337,61 +275,42 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self.assertEqual(jsi.call_function('x')([]), 1)
|
self.assertEqual(jsi.call_function('x')([]), 1)
|
||||||
|
|
||||||
def test_null(self):
|
def test_null(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return null; }', None)
|
||||||
function x() { return null; }
|
self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
|
||||||
''')
|
[False, False, False, False])
|
||||||
self.assertIs(jsi.call_function('x'), None)
|
self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [null >= 0, null <= 0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [True, True])
|
|
||||||
|
|
||||||
def test_undefined(self):
|
def test_undefined(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return undefined === undefined; }', True)
|
||||||
function x() { return undefined === undefined; }
|
self._test('function f() { return undefined; }', JS_Undefined)
|
||||||
''')
|
self._test('function f() {return undefined ?? 42; }', 42)
|
||||||
self.assertTrue(jsi.call_function('x'))
|
self._test('function f() { let v; return v; }', JS_Undefined)
|
||||||
|
self._test('function f() { let v; return v**0; }', 1)
|
||||||
|
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
|
||||||
|
[False, False, JS_Undefined, JS_Undefined])
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
self._test('''
|
||||||
function x() { return undefined; }
|
function f() { return [
|
||||||
''')
|
undefined === undefined,
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
undefined == undefined,
|
||||||
|
undefined == null
|
||||||
jsi = JSInterpreter('''
|
]; }
|
||||||
function x() { let v; return v; }
|
''', [True] * 3)
|
||||||
''')
|
self._test('''
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
function f() { return [
|
||||||
|
undefined < undefined,
|
||||||
jsi = JSInterpreter('''
|
undefined > undefined,
|
||||||
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
|
undefined === 0,
|
||||||
''')
|
undefined == 0,
|
||||||
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
|
undefined < 0,
|
||||||
|
undefined > 0,
|
||||||
jsi = JSInterpreter('''
|
undefined >= 0,
|
||||||
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
|
undefined <= 0,
|
||||||
''')
|
undefined > null,
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
undefined < null,
|
||||||
|
undefined === null
|
||||||
jsi = JSInterpreter('''
|
]; }
|
||||||
function x() { return [undefined >= 0, undefined <= 0]; }
|
''', [False] * 11)
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
||||||
@ -399,117 +318,140 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
for y in jsi.call_function('x'):
|
for y in jsi.call_function('x'):
|
||||||
self.assertTrue(math.isnan(y))
|
self.assertTrue(math.isnan(y))
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return v**0; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 1)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 42)
|
|
||||||
|
|
||||||
def test_object(self):
|
def test_object(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { return {}; }', {})
|
||||||
function x() { return {}; }
|
self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
|
||||||
''')
|
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
|
||||||
self.assertEqual(jsi.call_function('x'), {})
|
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), [42, 0])
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a; return a?.qq; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
|
||||||
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
|
||||||
|
|
||||||
def test_regex(self):
|
def test_regex(self):
|
||||||
jsi = JSInterpreter('''
|
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||||
function x() { let a=/,,[/,913,/](,)}/; }
|
|
||||||
''')
|
|
||||||
self.assertIs(jsi.call_function('x'), None)
|
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
||||||
''')
|
''')
|
||||||
attrs = set(('findall', 'finditer', 'flags', 'groupindex',
|
attrs = set(('findall', 'finditer', 'match', 'scanner', 'search',
|
||||||
'groups', 'match', 'pattern', 'scanner',
|
'split', 'sub', 'subn'))
|
||||||
'search', 'split', 'sub', 'subn'))
|
if sys.version_info >= (2, 7):
|
||||||
self.assertTrue(set(dir(jsi.call_function('x'))) > attrs)
|
# documented for 2.6 but may not be found
|
||||||
|
attrs.update(('flags', 'groupindex', 'groups', 'pattern'))
|
||||||
|
self.assertSetEqual(set(dir(jsi.call_function('x'))) & attrs, attrs)
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
||||||
''')
|
''')
|
||||||
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
jsi = JSInterpreter(r'function f() { let a=/,][}",],()}(\[)/; return a; }')
|
||||||
function x() { let a="data-name".replace("data-", ""); return a }
|
self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 'name')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
jsi = JSInterpreter(r'function f() { let a=[/[)\\]/]; return a[0]; }')
|
||||||
function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }
|
self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 'name')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
def test_replace(self):
|
||||||
function x() { let a="data-name".replace(/^.+-/, ""); return a; }
|
self._test('function f() { let a="data-name".replace("data-", ""); return a }',
|
||||||
''')
|
'name')
|
||||||
self.assertEqual(jsi.call_function('x'), 'name')
|
self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
|
||||||
|
'name')
|
||||||
jsi = JSInterpreter(r'''
|
self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
|
||||||
function x() { let a="data-name".replace(/a/g, "o"); return a; }
|
'name')
|
||||||
''')
|
self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
|
||||||
self.assertEqual(jsi.call_function('x'), 'doto-nome')
|
'doto-nome')
|
||||||
|
self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
|
||||||
jsi = JSInterpreter(r'''
|
'doto-nome')
|
||||||
function x() { let a="data-name".replaceAll("a", "o"); return a; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 'doto-nome')
|
|
||||||
|
|
||||||
jsi = JSInterpreter(r'''
|
|
||||||
function x() { let a=[/[)\\]/]; return a[0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
|
|
||||||
|
|
||||||
""" # fails
|
|
||||||
jsi = JSInterpreter(r'''
|
|
||||||
function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
|
|
||||||
''')
|
|
||||||
self.assertEqual(jsi.call_function('x'), 5)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_char_code_at(self):
|
def test_char_code_at(self):
|
||||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
|
||||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
self._test(jsi, 116, args=[0])
|
||||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
self._test(jsi, 101, args=[1])
|
||||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
self._test(jsi, 115, args=[2])
|
||||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
self._test(jsi, 116, args=[3])
|
||||||
self.assertEqual(jsi.call_function('x', 4), None)
|
self._test(jsi, None, args=[4])
|
||||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
self._test(jsi, 116, args=['not_a_number'])
|
||||||
|
|
||||||
def test_bitwise_operators_overflow(self):
|
def test_bitwise_operators_overflow(self):
|
||||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
self._test('function f(){return -524999584 << 5}', 379882496)
|
||||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
self._test('function f(){return 1236566549 << 5}', 915423904)
|
||||||
|
|
||||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
def test_bitwise_operators_typecast(self):
|
||||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
# madness
|
||||||
|
self._test('function f(){return null << 5}', 0)
|
||||||
|
self._test('function f(){return undefined >> 5}', 0)
|
||||||
|
self._test('function f(){return 42 << NaN}', 42)
|
||||||
|
self._test('function f(){return 42 << Infinity}', 42)
|
||||||
|
|
||||||
""" # fails so far
|
def test_negative(self):
|
||||||
|
self._test('function f(){return 2 * -2.0 ;}', -4)
|
||||||
|
self._test('function f(){return 2 - - -2 ;}', 0)
|
||||||
|
self._test('function f(){return 2 - - - -2 ;}', 4)
|
||||||
|
self._test('function f(){return 2 - + + - -2;}', 0)
|
||||||
|
self._test('function f(){return 2 + - + - -2;}', 0)
|
||||||
|
|
||||||
|
def test_32066(self):
|
||||||
|
self._test(
|
||||||
|
"function f(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}",
|
||||||
|
70)
|
||||||
|
|
||||||
|
@unittest.skip('Not yet working')
|
||||||
def test_packed(self):
|
def test_packed(self):
|
||||||
jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
|
self._test(
|
||||||
self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
|
'''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''',
|
||||||
"""
|
'''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))
|
||||||
|
|
||||||
|
def test_join(self):
|
||||||
|
test_input = list('test')
|
||||||
|
tests = [
|
||||||
|
'function f(a, b){return a.join(b)}',
|
||||||
|
'function f(a, b){return Array.prototype.join.call(a, b)}',
|
||||||
|
'function f(a, b){return Array.prototype.join.apply(a, [b])}',
|
||||||
|
]
|
||||||
|
for test in tests:
|
||||||
|
jsi = JSInterpreter(test)
|
||||||
|
self._test(jsi, 'test', args=[test_input, ''])
|
||||||
|
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||||
|
self._test(jsi, '', args=[[], '-'])
|
||||||
|
|
||||||
|
def test_split(self):
|
||||||
|
test_result = list('test')
|
||||||
|
tests = [
|
||||||
|
'function f(a, b){return a.split(b)}',
|
||||||
|
'function f(a, b){return String.prototype.split.call(a, b)}',
|
||||||
|
'function f(a, b){return String.prototype.split.apply(a, [b])}',
|
||||||
|
]
|
||||||
|
for test in tests:
|
||||||
|
jsi = JSInterpreter(test)
|
||||||
|
self._test(jsi, test_result, args=['test', ''])
|
||||||
|
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||||
|
self._test(jsi, [''], args=['', '-'])
|
||||||
|
self._test(jsi, [], args=['', ''])
|
||||||
|
|
||||||
|
def test_slice(self):
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
|
||||||
|
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
|
||||||
|
self._test('function f(){return "012345678".slice()}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(0)}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(5)}', '5678')
|
||||||
|
self._test('function f(){return "012345678".slice(99)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(-2)}', '78')
|
||||||
|
self._test('function f(){return "012345678".slice(-99)}', '012345678')
|
||||||
|
self._test('function f(){return "012345678".slice(0, 0)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(1, 0)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(0, 1)}', '0')
|
||||||
|
self._test('function f(){return "012345678".slice(3, 6)}', '345')
|
||||||
|
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
|
||||||
|
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||||
|
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -295,6 +295,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
|||||||
def test_allsubtitles(self):
|
def test_allsubtitles(self):
|
||||||
self.DL.params['writesubtitles'] = True
|
self.DL.params['writesubtitles'] = True
|
||||||
self.DL.params['allsubtitles'] = True
|
self.DL.params['allsubtitles'] = True
|
||||||
|
self.DL.params['format'] = 'best/bestvideo'
|
||||||
subtitles = self.getSubtitles()
|
subtitles = self.getSubtitles()
|
||||||
self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
|
self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
|
||||||
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
|
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
|
||||||
|
@ -5,16 +5,18 @@ from __future__ import unicode_literals
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
|
dirn = os.path.dirname
|
||||||
|
|
||||||
|
sys.path.insert(0, dirn(dirn(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from youtube_dl.swfinterp import SWFInterpreter
|
from youtube_dl.swfinterp import SWFInterpreter
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
TEST_DIR = os.path.join(
|
TEST_DIR = os.path.join(
|
||||||
@ -43,7 +45,7 @@ def _make_testfunc(testfile):
|
|||||||
'-static-link-runtime-shared-libraries', as_file])
|
'-static-link-runtime-shared-libraries', as_file])
|
||||||
except OSError as ose:
|
except OSError as ose:
|
||||||
if ose.errno == errno.ENOENT:
|
if ose.errno == errno.ENOENT:
|
||||||
print('mxmlc not found! Skipping test.')
|
self.skipTest('mxmlc not found!')
|
||||||
return
|
return
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@ -51,7 +53,7 @@ def _make_testfunc(testfile):
|
|||||||
swf_content = swf_f.read()
|
swf_content = swf_f.read()
|
||||||
swfi = SWFInterpreter(swf_content)
|
swfi = SWFInterpreter(swf_content)
|
||||||
|
|
||||||
with io.open(as_file, 'r', encoding='utf-8') as as_f:
|
with open(as_file, 'r', encoding='utf-8') as as_f:
|
||||||
as_content = as_f.read()
|
as_content = as_f.read()
|
||||||
|
|
||||||
def _find_spec(key):
|
def _find_spec(key):
|
||||||
|
509
test/test_traversal.py
Normal file
509
test/test_traversal.py
Normal file
@ -0,0 +1,509 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from youtube_dl.traversal import (
|
||||||
|
dict_get,
|
||||||
|
get_first,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
from youtube_dl.compat import (
|
||||||
|
compat_etree_fromstring,
|
||||||
|
compat_http_cookies,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from youtube_dl.utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
_TEST_DATA = {
|
||||||
|
100: 100,
|
||||||
|
1.2: 1.2,
|
||||||
|
'str': 'str',
|
||||||
|
'None': None,
|
||||||
|
'...': Ellipsis,
|
||||||
|
'urls': [
|
||||||
|
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||||
|
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||||
|
],
|
||||||
|
'data': (
|
||||||
|
{'index': 2},
|
||||||
|
{'index': 3},
|
||||||
|
),
|
||||||
|
'dict': {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
class _TestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
def assertCountEqual(self, *args, **kwargs):
|
||||||
|
return self.assertItemsEqual(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
_TestCase = unittest.TestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestTraversal(_TestCase):
|
||||||
|
def assertMaybeCountEqual(self, *args, **kwargs):
|
||||||
|
if sys.version_info < (3, 7):
|
||||||
|
# random dict order
|
||||||
|
return self.assertCountEqual(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
return self.assertEqual(*args, **kwargs)
|
||||||
|
|
||||||
|
def test_traverse_obj(self):
|
||||||
|
# instant compat
|
||||||
|
str = compat_str
|
||||||
|
|
||||||
|
# define a pukka Iterable
|
||||||
|
def iter_range(stop):
|
||||||
|
for from_ in range(stop):
|
||||||
|
yield from_
|
||||||
|
|
||||||
|
# Test base functionality
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||||
|
msg='allow tuple path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
||||||
|
msg='allow list path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
||||||
|
msg='allow iterable path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
||||||
|
msg='single items should be treated as a path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
||||||
|
|
||||||
|
# Test Ellipsis behavior
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
||||||
|
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
||||||
|
msg='`...` should give all non-discarded values')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
||||||
|
msg='`...` selection for dicts should select all values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
||||||
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
|
msg='nested `...` queries should work')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
|
||||||
|
msg='`...` query result should be flattened')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
|
||||||
|
msg='`...` should accept iterables')
|
||||||
|
|
||||||
|
# Test function as key
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||||
|
[_TEST_DATA['urls']],
|
||||||
|
msg='function as query key should perform a filter based on (key, value)')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
|
||||||
|
msg='exceptions in the query function should be caught')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||||
|
msg='function key should accept iterables')
|
||||||
|
if __debug__:
|
||||||
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a: Ellipsis)
|
||||||
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
|
||||||
|
|
||||||
|
# Test set as key (transformation/type, like `expected_type`)
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
|
||||||
|
msg='Function in set should be a transformation')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
|
||||||
|
msg='Function in set should always be called')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
|
||||||
|
msg='Type in set should be a type filter')
|
||||||
|
self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
|
||||||
|
msg='Multiple types in set should be a type filter')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
|
||||||
|
msg='A single set should be wrapped into a path')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
|
||||||
|
msg='Transformation function should not raise')
|
||||||
|
self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
|
||||||
|
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
|
||||||
|
msg='Function in set should be a transformation')
|
||||||
|
if __debug__:
|
||||||
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, set())
|
||||||
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
|
traverse_obj(_TEST_DATA, set((str.upper, str)))
|
||||||
|
|
||||||
|
# Test `slice` as a key
|
||||||
|
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
||||||
|
msg='slice on a dictionary should not throw')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
|
||||||
|
# Test alternative paths
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||||
|
msg='multiple `paths` should be treated as alternative paths')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||||
|
msg='alternatives should exit early')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||||
|
msg='alternatives should return `default` if exhausted')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
|
||||||
|
msg='alternatives should track their own branching return')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
|
||||||
|
msg='alternatives on empty objects should search further')
|
||||||
|
|
||||||
|
# Test branch and path nesting
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||||
|
msg='tuple as key should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
||||||
|
msg='list as key should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
||||||
|
msg='double nesting in path should be treated as paths')
|
||||||
|
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
||||||
|
msg='do not fail early on branching')
|
||||||
|
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
||||||
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
|
msg='triple nesting in path should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
|
||||||
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
|
msg='ellipsis as branch path start gets flattened')
|
||||||
|
|
||||||
|
# Test dictionary as key
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
||||||
|
msg='dict key should result in a dict with the same keys')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
||||||
|
{0: 'https://www.example.com/0'},
|
||||||
|
msg='dict key should allow paths')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
||||||
|
{0: ['https://www.example.com/0']},
|
||||||
|
msg='tuple in dict path should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
||||||
|
{0: ['https://www.example.com/0']},
|
||||||
|
msg='double nesting in dict path should be treated as paths')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||||
|
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||||
|
msg='triple nesting in dict path should be treated as branches')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||||
|
msg='remove `None` values when top level dict key fails')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='use `default` if key fails and `default`')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
||||||
|
msg='remove empty values when dict key')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='use `default` when dict key and a default')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
||||||
|
msg='remove empty values when nested dict key fails')
|
||||||
|
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
||||||
|
msg='default to dict if pruned')
|
||||||
|
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||||
|
msg='default to dict if pruned and default is given')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
|
||||||
|
msg='use nested `default` when nested dict key fails and `default`')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
|
||||||
|
msg='remove key if branch in dict key not successful')
|
||||||
|
|
||||||
|
# Testing default parameter behavior
|
||||||
|
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
||||||
|
msg='default value should be `None`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
|
||||||
|
msg='chained fails should result in default')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
||||||
|
msg='should not short cirquit on `None`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
||||||
|
msg='invalid dict key should result in `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
||||||
|
msg='`None` is a deliberate sentinel and should become `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||||
|
msg='`IndexError` should result in `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
|
||||||
|
msg='if branched but not successful return `default` if defined, not `[]`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
|
||||||
|
msg='if branched but not successful return `default` even if `default` is `None`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
|
||||||
|
msg='if branched but not successful return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
||||||
|
msg='if branched but object is empty return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj(None, Ellipsis), [],
|
||||||
|
msg='if branched but object is `None` return `[]`, not `default`')
|
||||||
|
self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
|
||||||
|
msg='if branched but state is `None` return `[]`, not `default`')
|
||||||
|
|
||||||
|
branching_paths = [
|
||||||
|
('fail', Ellipsis),
|
||||||
|
(Ellipsis, 'fail'),
|
||||||
|
100 * ('fail',) + (Ellipsis,),
|
||||||
|
(Ellipsis,) + 100 * ('fail',),
|
||||||
|
]
|
||||||
|
for branching_path in branching_paths:
|
||||||
|
self.assertEqual(traverse_obj({}, branching_path), [],
|
||||||
|
msg='if branched but state is `None`, return `[]` (not `default`)')
|
||||||
|
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
|
||||||
|
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
|
||||||
|
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
|
||||||
|
msg='if branching in last alternative and previous did match, return single value')
|
||||||
|
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
|
||||||
|
msg='if branching in first alternative and non-branching path does match, return single value')
|
||||||
|
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
|
||||||
|
msg='if branching in first alternative and non-branching path does not match, return `default`')
|
||||||
|
|
||||||
|
# Testing expected_type behavior
|
||||||
|
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
||||||
|
'str', msg='accept matching `expected_type` type')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
||||||
|
None, msg='reject non-matching `expected_type` type')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
||||||
|
'0', msg='transform type using type function')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
||||||
|
None, msg='wrap expected_type function in try_call')
|
||||||
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
|
||||||
|
['str'], msg='eliminate items that expected_type fails on')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
|
||||||
|
{0: 100}, msg='type as expected_type should filter dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
||||||
|
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
|
||||||
|
1, msg='expected_type should not filter non-final dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
||||||
|
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
|
||||||
|
[{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
|
||||||
|
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
|
||||||
|
[4], msg='expected_type regression for type matching in tuple branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
|
||||||
|
[], msg='expected_type regression for type matching in dict result')
|
||||||
|
|
||||||
|
# Test get_all behavior
|
||||||
|
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||||
|
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
|
||||||
|
msg='if not `get_all`, return only first matching value')
|
||||||
|
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
|
||||||
|
msg='do not overflatten if not `get_all`')
|
||||||
|
|
||||||
|
# Test casesense behavior
|
||||||
|
_CASESENSE_DATA = {
|
||||||
|
'KeY': 'value0',
|
||||||
|
0: {
|
||||||
|
'KeY': 'value1',
|
||||||
|
0: {'KeY': 'value2'},
|
||||||
|
},
|
||||||
|
# FULLWIDTH LATIN CAPITAL LETTER K
|
||||||
|
'\uff2bey': 'value3',
|
||||||
|
}
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
||||||
|
msg='dict keys should be case sensitive unless `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
||||||
|
casesense=False), 'value0',
|
||||||
|
msg='allow non matching key case if `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
|
||||||
|
casesense=False), 'value3',
|
||||||
|
msg='allow non matching Unicode key case if `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
||||||
|
casesense=False), ['value1'],
|
||||||
|
msg='allow non matching key case in branch if `casesense`')
|
||||||
|
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
||||||
|
casesense=False), ['value2'],
|
||||||
|
msg='allow non matching key case in branch path if `casesense`')
|
||||||
|
|
||||||
|
# Test traverse_string behavior
|
||||||
|
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
||||||
|
msg='do not traverse into string if not `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
||||||
|
_traverse_string=True), 's',
|
||||||
|
msg='traverse into string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
||||||
|
_traverse_string=True), '.',
|
||||||
|
msg='traverse into converted data if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
||||||
|
_traverse_string=True), 'str',
|
||||||
|
msg='`...` should result in string (same value) if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
||||||
|
_traverse_string=True), 'sr',
|
||||||
|
msg='`slice` should result in string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
|
||||||
|
_traverse_string=True), 'str',
|
||||||
|
msg='function should result in string if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||||
|
_traverse_string=True), ['s', 'r'],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
|
||||||
|
# Test re.Match as input obj
|
||||||
|
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
|
||||||
|
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
|
||||||
|
msg='`...` on a `re.Match` should give its `groups()`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
||||||
|
msg='function on a `re.Match` should give groupno, value starting at 0')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
||||||
|
msg='str key on a `re.Match` should give group with that name')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 2), '3',
|
||||||
|
msg='int key on a `re.Match` should give group with that name')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
||||||
|
msg='str key on a `re.Match` should respect casesense')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
||||||
|
msg='failing str key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
||||||
|
msg='failing str key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||||
|
msg='failing int key on a `re.Match` should return `default`')
|
||||||
|
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||||
|
msg='function on a `re.Match` should give group name as well')
|
||||||
|
|
||||||
|
# Test xml.etree.ElementTree.Element as input obj
|
||||||
|
etree = compat_etree_fromstring('''<?xml version="1.0"?>
|
||||||
|
<data>
|
||||||
|
<country name="Liechtenstein">
|
||||||
|
<rank>1</rank>
|
||||||
|
<year>2008</year>
|
||||||
|
<gdppc>141100</gdppc>
|
||||||
|
<neighbor name="Austria" direction="E"/>
|
||||||
|
<neighbor name="Switzerland" direction="W"/>
|
||||||
|
</country>
|
||||||
|
<country name="Singapore">
|
||||||
|
<rank>4</rank>
|
||||||
|
<year>2011</year>
|
||||||
|
<gdppc>59900</gdppc>
|
||||||
|
<neighbor name="Malaysia" direction="N"/>
|
||||||
|
</country>
|
||||||
|
<country name="Panama">
|
||||||
|
<rank>68</rank>
|
||||||
|
<year>2011</year>
|
||||||
|
<gdppc>13600</gdppc>
|
||||||
|
<neighbor name="Costa Rica" direction="W"/>
|
||||||
|
<neighbor name="Colombia" direction="E"/>
|
||||||
|
</country>
|
||||||
|
</data>''')
|
||||||
|
self.assertEqual(traverse_obj(etree, ''), etree,
|
||||||
|
msg='empty str key should return the element itself')
|
||||||
|
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
|
||||||
|
msg='str key should return all children with that tag name')
|
||||||
|
self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
|
||||||
|
msg='`...` as key should return all children')
|
||||||
|
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
|
||||||
|
msg='function as key should get element as value')
|
||||||
|
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
|
||||||
|
msg='function as key should get index as key')
|
||||||
|
self.assertEqual(traverse_obj(etree, 0), etree[0],
|
||||||
|
msg='int key should return the nth child')
|
||||||
|
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
|
||||||
|
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
|
||||||
|
msg='`@<attribute>` at end of path should give that attribute')
|
||||||
|
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
|
||||||
|
msg='`@<nonexistent>` at end of path should give `None`')
|
||||||
|
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
|
||||||
|
msg='`@` should give the full attribute dict')
|
||||||
|
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
|
||||||
|
msg='`text()` at end of path should give the inner text')
|
||||||
|
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
|
||||||
|
msg='full python xpath features should be supported')
|
||||||
|
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
|
||||||
|
msg='special transformations should act on current element')
|
||||||
|
self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
|
||||||
|
msg='special transformations should act on current element')
|
||||||
|
|
||||||
|
def test_traversal_unbranching(self):
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
|
||||||
|
msg='`all` should give all results as list')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
|
||||||
|
msg='`any` should give the first result')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
|
||||||
|
msg='`all` should give list if non branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
|
||||||
|
msg='`any` should give single item if non branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
|
||||||
|
msg='`all` should filter `None` and empty dict')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
|
||||||
|
msg='`any` should filter `None` and empty dict')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [{
|
||||||
|
'all': [('dict', 'None', 100, 1.2), all],
|
||||||
|
'any': [('dict', 'None', 100, 1.2), any],
|
||||||
|
}]), {'all': [100, 1.2], 'any': 100},
|
||||||
|
msg='`all`/`any` should apply to each dict path separately')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [{
|
||||||
|
'all': [('dict', 'None', 100, 1.2), all],
|
||||||
|
'any': [('dict', 'None', 100, 1.2), any],
|
||||||
|
}], get_all=False), {'all': [100, 1.2], 'any': 100},
|
||||||
|
msg='`all`/`any` should apply to dict regardless of `get_all`')
|
||||||
|
self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
|
||||||
|
msg='`all` should reset branching status')
|
||||||
|
self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
|
||||||
|
msg='`any` should reset branching status')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
|
||||||
|
msg='`all` should allow further branching')
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
|
||||||
|
msg='`any` should allow further branching')
|
||||||
|
|
||||||
|
def test_traversal_morsel(self):
|
||||||
|
values = {
|
||||||
|
'expires': 'a',
|
||||||
|
'path': 'b',
|
||||||
|
'comment': 'c',
|
||||||
|
'domain': 'd',
|
||||||
|
'max-age': 'e',
|
||||||
|
'secure': 'f',
|
||||||
|
'httponly': 'g',
|
||||||
|
'version': 'h',
|
||||||
|
'samesite': 'i',
|
||||||
|
}
|
||||||
|
# SameSite added in Py3.8, breaks .update for 3.5-3.7
|
||||||
|
if sys.version_info < (3, 8):
|
||||||
|
del values['samesite']
|
||||||
|
morsel = compat_http_cookies.Morsel()
|
||||||
|
morsel.set(str('item_key'), 'item_value', 'coded_value')
|
||||||
|
morsel.update(values)
|
||||||
|
values['key'] = str('item_key')
|
||||||
|
values['value'] = 'item_value'
|
||||||
|
values = dict((str(k), v) for k, v in values.items())
|
||||||
|
# make test pass even without ordered dict
|
||||||
|
value_set = set(values.values())
|
||||||
|
|
||||||
|
for key, value in values.items():
|
||||||
|
self.assertEqual(traverse_obj(morsel, key), value,
|
||||||
|
msg='Morsel should provide access to all values')
|
||||||
|
self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
|
||||||
|
msg='`...` should yield all values')
|
||||||
|
self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
|
||||||
|
msg='function key should yield all values')
|
||||||
|
self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
|
||||||
|
msg='Morsel should not be implicitly changed to dict on usage')
|
||||||
|
|
||||||
|
def test_get_first(self):
|
||||||
|
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
||||||
|
|
||||||
|
def test_dict_get(self):
|
||||||
|
FALSE_VALUES = {
|
||||||
|
'none': None,
|
||||||
|
'false': False,
|
||||||
|
'zero': 0,
|
||||||
|
'empty_string': '',
|
||||||
|
'empty_list': [],
|
||||||
|
}
|
||||||
|
d = FALSE_VALUES.copy()
|
||||||
|
d['a'] = 42
|
||||||
|
self.assertEqual(dict_get(d, 'a'), 42)
|
||||||
|
self.assertEqual(dict_get(d, 'b'), None)
|
||||||
|
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||||
|
for key, false_value in FALSE_VALUES.items():
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||||
|
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -2,19 +2,21 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
import io
|
dirn = os.path.dirname
|
||||||
import re
|
|
||||||
|
|
||||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
rootDir = dirn(dirn(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
sys.path.insert(0, rootDir)
|
||||||
|
|
||||||
IGNORED_FILES = [
|
IGNORED_FILES = [
|
||||||
'setup.py', # http://bugs.python.org/issue13943
|
'setup.py', # http://bugs.python.org/issue13943
|
||||||
'conf.py',
|
'conf.py',
|
||||||
'buildserver.py',
|
'buildserver.py',
|
||||||
|
'get-pip.py',
|
||||||
]
|
]
|
||||||
|
|
||||||
IGNORED_DIRS = [
|
IGNORED_DIRS = [
|
||||||
@ -23,6 +25,7 @@ IGNORED_DIRS = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
from test.helper import assertRegexpMatches
|
from test.helper import assertRegexpMatches
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
class TestUnicodeLiterals(unittest.TestCase):
|
class TestUnicodeLiterals(unittest.TestCase):
|
||||||
@ -40,7 +43,7 @@ class TestUnicodeLiterals(unittest.TestCase):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
fn = os.path.join(dirpath, basename)
|
fn = os.path.join(dirpath, basename)
|
||||||
with io.open(fn, encoding='utf-8') as inf:
|
with open(fn, encoding='utf-8') as inf:
|
||||||
code = inf.read()
|
code = inf.read()
|
||||||
|
|
||||||
if "'" not in code and '"' not in code:
|
if "'" not in code and '"' not in code:
|
||||||
|
@ -14,13 +14,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import types
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
|
_UnsafeExtensionError,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
encode_base_n,
|
base_url,
|
||||||
caesar,
|
caesar,
|
||||||
clean_html,
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
@ -28,11 +29,12 @@ from youtube_dl.utils import (
|
|||||||
DateRange,
|
DateRange,
|
||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
encode_base_n,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
|
expand_path,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@ -42,7 +44,6 @@ from youtube_dl.utils import (
|
|||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
get_elements_by_class,
|
get_elements_by_class,
|
||||||
get_elements_by_attribute,
|
get_elements_by_attribute,
|
||||||
get_first,
|
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
@ -51,6 +52,7 @@ from youtube_dl.utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
LazyList,
|
LazyList,
|
||||||
limit_length,
|
limit_length,
|
||||||
|
lowercase_escape,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
month_by_name,
|
month_by_name,
|
||||||
@ -59,24 +61,26 @@ from youtube_dl.utils import (
|
|||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
|
parse_bitrate,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
|
parse_codecs,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
parse_bitrate,
|
parse_qs,
|
||||||
pkcs1pad,
|
pkcs1pad,
|
||||||
read_batch_urls,
|
|
||||||
sanitize_filename,
|
|
||||||
sanitize_path,
|
|
||||||
sanitize_url,
|
|
||||||
expand_path,
|
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
replace_extension,
|
read_batch_urls,
|
||||||
remove_start,
|
remove_start,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_quotes,
|
remove_quotes,
|
||||||
|
replace_extension,
|
||||||
rot47,
|
rot47,
|
||||||
|
sanitize_filename,
|
||||||
|
sanitize_path,
|
||||||
|
sanitize_url,
|
||||||
|
sanitized_Request,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
@ -84,17 +88,14 @@ from youtube_dl.utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
traverse_obj,
|
|
||||||
try_call,
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
lowercase_escape,
|
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
base_url,
|
|
||||||
urljoin,
|
urljoin,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urshift,
|
urshift,
|
||||||
@ -112,7 +113,7 @@ from youtube_dl.utils import (
|
|||||||
cli_option,
|
cli_option,
|
||||||
cli_valueless_option,
|
cli_valueless_option,
|
||||||
cli_bool_option,
|
cli_bool_option,
|
||||||
parse_codecs,
|
YoutubeDLHandler,
|
||||||
)
|
)
|
||||||
from youtube_dl.compat import (
|
from youtube_dl.compat import (
|
||||||
compat_chr,
|
compat_chr,
|
||||||
@ -122,16 +123,11 @@ from youtube_dl.compat import (
|
|||||||
compat_setenv,
|
compat_setenv,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_parse_qs,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestUtil(unittest.TestCase):
|
class TestUtil(unittest.TestCase):
|
||||||
|
|
||||||
# yt-dlp shim
|
|
||||||
def assertCountEqual(self, expected, got, msg='count should be the same'):
|
|
||||||
return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
|
|
||||||
|
|
||||||
def test_timeconvert(self):
|
def test_timeconvert(self):
|
||||||
self.assertTrue(timeconvert('') is None)
|
self.assertTrue(timeconvert('') is None)
|
||||||
self.assertTrue(timeconvert('bougrg') is None)
|
self.assertTrue(timeconvert('bougrg') is None)
|
||||||
@ -252,6 +248,18 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
||||||
self.assertEqual(sanitize_url('foo bar'), 'foo bar')
|
self.assertEqual(sanitize_url('foo bar'), 'foo bar')
|
||||||
|
|
||||||
|
def test_sanitized_Request(self):
|
||||||
|
self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
|
||||||
|
self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
|
||||||
|
self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
|
||||||
|
'Basic Og==')
|
||||||
|
self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
|
||||||
|
'Basic OnBhc3M=')
|
||||||
|
self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
|
||||||
|
'Basic dXNlcjo=')
|
||||||
|
self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
|
||||||
|
'Basic dXNlcjpwYXNz')
|
||||||
|
|
||||||
def test_expand_path(self):
|
def test_expand_path(self):
|
||||||
def env(var):
|
def env(var):
|
||||||
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
|
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
|
||||||
@ -264,6 +272,27 @@ class TestUtil(unittest.TestCase):
|
|||||||
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
|
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
|
||||||
'%s/expanded' % compat_getenv('HOME'))
|
'%s/expanded' % compat_getenv('HOME'))
|
||||||
|
|
||||||
|
_uncommon_extensions = [
|
||||||
|
('exe', 'abc.exe.ext'),
|
||||||
|
('de', 'abc.de.ext'),
|
||||||
|
('../.mp4', None),
|
||||||
|
('..\\.mp4', None),
|
||||||
|
]
|
||||||
|
|
||||||
|
def assertUnsafeExtension(self, ext=None):
|
||||||
|
assert_raises = self.assertRaises(_UnsafeExtensionError)
|
||||||
|
assert_raises.ext = ext
|
||||||
|
orig_exit = assert_raises.__exit__
|
||||||
|
|
||||||
|
def my_exit(self_, exc_type, exc_val, exc_tb):
|
||||||
|
did_raise = orig_exit(exc_type, exc_val, exc_tb)
|
||||||
|
if did_raise and assert_raises.ext is not None:
|
||||||
|
self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension not as unexpected')
|
||||||
|
return did_raise
|
||||||
|
|
||||||
|
assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
|
||||||
|
return assert_raises
|
||||||
|
|
||||||
def test_prepend_extension(self):
|
def test_prepend_extension(self):
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||||
@ -272,6 +301,19 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
||||||
|
|
||||||
|
# Test uncommon extensions
|
||||||
|
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
|
||||||
|
for ext, result in self._uncommon_extensions:
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
prepend_extension('abc', ext)
|
||||||
|
if result:
|
||||||
|
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
|
||||||
|
else:
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
prepend_extension('abc.ext', ext, 'ext')
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
prepend_extension('abc.unexpected_ext', ext, 'ext')
|
||||||
|
|
||||||
def test_replace_extension(self):
|
def test_replace_extension(self):
|
||||||
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
||||||
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
||||||
@ -280,6 +322,16 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||||
|
|
||||||
|
# Test uncommon extensions
|
||||||
|
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
|
||||||
|
for ext, _ in self._uncommon_extensions:
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
replace_extension('abc', ext)
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
replace_extension('abc.ext', ext, 'ext')
|
||||||
|
with self.assertUnsafeExtension(ext):
|
||||||
|
replace_extension('abc.unexpected_ext', ext, 'ext')
|
||||||
|
|
||||||
def test_subtitles_filename(self):
|
def test_subtitles_filename(self):
|
||||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
||||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
||||||
@ -509,11 +561,14 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(float_or_none(set()), None)
|
self.assertEqual(float_or_none(set()), None)
|
||||||
|
|
||||||
def test_int_or_none(self):
|
def test_int_or_none(self):
|
||||||
|
self.assertEqual(int_or_none(42), 42)
|
||||||
self.assertEqual(int_or_none('42'), 42)
|
self.assertEqual(int_or_none('42'), 42)
|
||||||
self.assertEqual(int_or_none(''), None)
|
self.assertEqual(int_or_none(''), None)
|
||||||
self.assertEqual(int_or_none(None), None)
|
self.assertEqual(int_or_none(None), None)
|
||||||
self.assertEqual(int_or_none([]), None)
|
self.assertEqual(int_or_none([]), None)
|
||||||
self.assertEqual(int_or_none(set()), None)
|
self.assertEqual(int_or_none(set()), None)
|
||||||
|
self.assertEqual(int_or_none('42', base=8), 34)
|
||||||
|
self.assertRaises(TypeError, int_or_none(42, base=8))
|
||||||
|
|
||||||
def test_str_to_int(self):
|
def test_str_to_int(self):
|
||||||
self.assertEqual(str_to_int('123,456'), 123456)
|
self.assertEqual(str_to_int('123,456'), 123456)
|
||||||
@ -680,38 +735,36 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertTrue(isinstance(data, bytes))
|
self.assertTrue(isinstance(data, bytes))
|
||||||
|
|
||||||
def test_update_url_query(self):
|
def test_update_url_query(self):
|
||||||
def query_dict(url):
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
return compat_parse_qs(compat_urlparse.urlparse(url).query)
|
|
||||||
self.assertEqual(query_dict(update_url_query(
|
|
||||||
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
|
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
|
||||||
query_dict('http://example.com/path?quality=HD&format=mp4'))
|
parse_qs('http://example.com/path?quality=HD&format=mp4'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
|
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
|
||||||
query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
|
parse_qs('http://example.com/path?system=LINUX&system=WINDOWS'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
|
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
|
||||||
query_dict('http://example.com/path?fields=id,formats,subtitles'))
|
parse_qs('http://example.com/path?fields=id,formats,subtitles'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
|
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
|
||||||
query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
|
parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path?manifest=f4m', {'manifest': []})),
|
'http://example.com/path?manifest=f4m', {'manifest': []})),
|
||||||
query_dict('http://example.com/path'))
|
parse_qs('http://example.com/path'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
|
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
|
||||||
query_dict('http://example.com/path?system=LINUX'))
|
parse_qs('http://example.com/path?system=LINUX'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
|
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
|
||||||
query_dict('http://example.com/path?fields=id,formats,subtitles'))
|
parse_qs('http://example.com/path?fields=id,formats,subtitles'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'width': 1080, 'height': 720})),
|
'http://example.com/path', {'width': 1080, 'height': 720})),
|
||||||
query_dict('http://example.com/path?width=1080&height=720'))
|
parse_qs('http://example.com/path?width=1080&height=720'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'bitrate': 5020.43})),
|
'http://example.com/path', {'bitrate': 5020.43})),
|
||||||
query_dict('http://example.com/path?bitrate=5020.43'))
|
parse_qs('http://example.com/path?bitrate=5020.43'))
|
||||||
self.assertEqual(query_dict(update_url_query(
|
self.assertEqual(parse_qs(update_url_query(
|
||||||
'http://example.com/path', {'test': '第二行тест'})),
|
'http://example.com/path', {'test': '第二行тест'})),
|
||||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||||
|
|
||||||
def test_multipart_encode(self):
|
def test_multipart_encode(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@ -723,28 +776,6 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertRaises(
|
self.assertRaises(
|
||||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||||
|
|
||||||
def test_dict_get(self):
|
|
||||||
FALSE_VALUES = {
|
|
||||||
'none': None,
|
|
||||||
'false': False,
|
|
||||||
'zero': 0,
|
|
||||||
'empty_string': '',
|
|
||||||
'empty_list': [],
|
|
||||||
}
|
|
||||||
d = FALSE_VALUES.copy()
|
|
||||||
d['a'] = 42
|
|
||||||
self.assertEqual(dict_get(d, 'a'), 42)
|
|
||||||
self.assertEqual(dict_get(d, 'b'), None)
|
|
||||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
|
||||||
for key, false_value in FALSE_VALUES.items():
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
|
||||||
|
|
||||||
def test_merge_dicts(self):
|
def test_merge_dicts(self):
|
||||||
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
||||||
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
||||||
@ -903,6 +934,111 @@ class TestUtil(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||||
|
|
||||||
|
def test_remove_dot_segments(self):
|
||||||
|
|
||||||
|
def remove_dot_segments(p):
|
||||||
|
q = '' if p.startswith('/') else '/'
|
||||||
|
p = 'http://example.com' + q + p
|
||||||
|
p = compat_urlparse.urlsplit(YoutubeDLHandler._fix_path(p)).path
|
||||||
|
return p[1:] if q else p
|
||||||
|
|
||||||
|
self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
|
||||||
|
self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
|
||||||
|
self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
|
||||||
|
self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
|
||||||
|
self.assertEqual(remove_dot_segments('/..'), '/')
|
||||||
|
self.assertEqual(remove_dot_segments('/./'), '/')
|
||||||
|
self.assertEqual(remove_dot_segments('/./a'), '/a')
|
||||||
|
self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
|
||||||
|
self.assertEqual(remove_dot_segments('/'), '/')
|
||||||
|
self.assertEqual(remove_dot_segments('/t'), '/t')
|
||||||
|
self.assertEqual(remove_dot_segments('t'), 't')
|
||||||
|
self.assertEqual(remove_dot_segments(''), '')
|
||||||
|
self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
|
||||||
|
self.assertEqual(remove_dot_segments('../a'), 'a')
|
||||||
|
self.assertEqual(remove_dot_segments('./a'), 'a')
|
||||||
|
self.assertEqual(remove_dot_segments('.'), '')
|
||||||
|
self.assertEqual(remove_dot_segments('////'), '////')
|
||||||
|
|
||||||
|
def test_js_to_json_vars_strings(self):
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'null': a,
|
||||||
|
'nullStr': b,
|
||||||
|
'true': c,
|
||||||
|
'trueStr': d,
|
||||||
|
'false': e,
|
||||||
|
'falseStr': f,
|
||||||
|
'unresolvedVar': g,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': 'null',
|
||||||
|
'b': '"null"',
|
||||||
|
'c': 'true',
|
||||||
|
'd': '"true"',
|
||||||
|
'e': 'false',
|
||||||
|
'f': '"false"',
|
||||||
|
'g': 'var',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'null': None,
|
||||||
|
'nullStr': 'null',
|
||||||
|
'true': True,
|
||||||
|
'trueStr': 'true',
|
||||||
|
'false': False,
|
||||||
|
'falseStr': 'false',
|
||||||
|
'unresolvedVar': 'var'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'int': a,
|
||||||
|
'intStr': b,
|
||||||
|
'float': c,
|
||||||
|
'floatStr': d,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': '123',
|
||||||
|
'b': '"123"',
|
||||||
|
'c': '1.23',
|
||||||
|
'd': '"1.23"',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'int': 123,
|
||||||
|
'intStr': '123',
|
||||||
|
'float': 1.23,
|
||||||
|
'floatStr': '1.23',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertDictEqual(
|
||||||
|
json.loads(js_to_json(
|
||||||
|
'''{
|
||||||
|
'object': a,
|
||||||
|
'objectStr': b,
|
||||||
|
'array': c,
|
||||||
|
'arrayStr': d,
|
||||||
|
}''',
|
||||||
|
{
|
||||||
|
'a': '{}',
|
||||||
|
'b': '"{}"',
|
||||||
|
'c': '[]',
|
||||||
|
'd': '"[]"',
|
||||||
|
}
|
||||||
|
)),
|
||||||
|
{
|
||||||
|
'object': {},
|
||||||
|
'objectStr': '{}',
|
||||||
|
'array': [],
|
||||||
|
'arrayStr': '[]',
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
def test_js_to_json_realworld(self):
|
def test_js_to_json_realworld(self):
|
||||||
inp = '''{
|
inp = '''{
|
||||||
'clip':{'provider':'pseudo'}
|
'clip':{'provider':'pseudo'}
|
||||||
@ -973,10 +1109,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
!42: 42
|
!42: 42
|
||||||
}''')
|
}''')
|
||||||
self.assertEqual(json.loads(on), {
|
self.assertEqual(json.loads(on), {
|
||||||
'a': 0,
|
'a': True,
|
||||||
'b': 1,
|
'b': False,
|
||||||
'c': 0,
|
'c': False,
|
||||||
'd': 42.42,
|
'd': True,
|
||||||
'e': [],
|
'e': [],
|
||||||
'f': "abc",
|
'f': "abc",
|
||||||
'g': "",
|
'g': "",
|
||||||
@ -1046,10 +1182,26 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{ "040": "040" }')
|
on = js_to_json('{ "040": "040" }')
|
||||||
self.assertEqual(json.loads(on), {'040': '040'})
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
|
on = js_to_json('[1,//{},\n2]')
|
||||||
|
self.assertEqual(json.loads(on), [1, 2])
|
||||||
|
|
||||||
|
on = js_to_json(r'"\^\$\#"')
|
||||||
|
self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
|
||||||
|
|
||||||
|
on = js_to_json('\'"\\""\'')
|
||||||
|
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
||||||
|
def test_js_to_json_template_literal(self):
|
||||||
|
self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
|
||||||
|
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||||
|
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||||
|
|
||||||
def test_extract_attributes(self):
|
def test_extract_attributes(self):
|
||||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
@ -1563,238 +1715,7 @@ Line 1
|
|||||||
self.assertEqual(variadic(None), (None, ))
|
self.assertEqual(variadic(None), (None, ))
|
||||||
self.assertEqual(variadic('spam'), ('spam', ))
|
self.assertEqual(variadic('spam'), ('spam', ))
|
||||||
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
||||||
|
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
||||||
def test_traverse_obj(self):
|
|
||||||
_TEST_DATA = {
|
|
||||||
100: 100,
|
|
||||||
1.2: 1.2,
|
|
||||||
'str': 'str',
|
|
||||||
'None': None,
|
|
||||||
'...': Ellipsis,
|
|
||||||
'urls': [
|
|
||||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
|
||||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
|
||||||
],
|
|
||||||
'data': (
|
|
||||||
{'index': 2},
|
|
||||||
{'index': 3},
|
|
||||||
),
|
|
||||||
'dict': {},
|
|
||||||
}
|
|
||||||
|
|
||||||
# Test base functionality
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
|
||||||
msg='allow tuple path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
|
||||||
msg='allow list path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
|
||||||
msg='allow iterable path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
|
||||||
msg='single items should be treated as a path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
|
||||||
|
|
||||||
# Test Ellipsis behavior
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
|
||||||
(item for item in _TEST_DATA.values() if item is not None),
|
|
||||||
msg='`...` should give all values except `None`')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
|
||||||
msg='`...` selection for dicts should select all values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='nested `...` queries should work')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
|
|
||||||
msg='`...` query result should be flattened')
|
|
||||||
|
|
||||||
# Test function as key
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
|
||||||
[_TEST_DATA['urls']],
|
|
||||||
msg='function as query key should perform a filter based on (key, value)')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
|
|
||||||
msg='exceptions in the query function should be caught')
|
|
||||||
|
|
||||||
# Test alternative paths
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
|
||||||
msg='multiple `paths` should be treated as alternative paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
|
||||||
msg='alternatives should exit early')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
|
||||||
msg='alternatives should return `default` if exhausted')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
|
|
||||||
msg='alternatives should track their own branching return')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
|
|
||||||
msg='alternatives on empty objects should search further')
|
|
||||||
|
|
||||||
# Test branch and path nesting
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
|
||||||
msg='tuple as key should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
|
||||||
msg='list as key should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
|
||||||
msg='double nesting in path should be treated as paths')
|
|
||||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
|
||||||
msg='do not fail early on branching')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='triple nesting in path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='ellipsis as branch path start gets flattened')
|
|
||||||
|
|
||||||
# Test dictionary as key
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
|
||||||
msg='dict key should result in a dict with the same keys')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
|
||||||
{0: 'https://www.example.com/0'},
|
|
||||||
msg='dict key should allow paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
|
||||||
{0: ['https://www.example.com/0']},
|
|
||||||
msg='tuple in dict path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
|
||||||
{0: ['https://www.example.com/0']},
|
|
||||||
msg='double nesting in dict path should be treated as paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
|
||||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
|
||||||
msg='triple nesting in dict path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
|
||||||
msg='remove `None` values when dict key')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
|
||||||
msg='do not remove `None` values if `default`')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
|
|
||||||
msg='do not remove empty values when dict key')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
|
|
||||||
msg='do not remove empty values when dict key and a default')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
|
|
||||||
msg='if branch in dict key not successful, return `[]`')
|
|
||||||
|
|
||||||
# Testing default parameter behavior
|
|
||||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
|
||||||
msg='default value should be `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
|
|
||||||
msg='chained fails should result in default')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
|
||||||
msg='should not short cirquit on `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
|
||||||
msg='invalid dict key should result in `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
|
||||||
msg='`None` is a deliberate sentinel and should become `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
|
||||||
msg='`IndexError` should result in `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
|
|
||||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
|
|
||||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
|
|
||||||
msg='if branched but not successful return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
|
||||||
msg='if branched but object is empty return `[]`, not `default`')
|
|
||||||
|
|
||||||
# Testing expected_type behavior
|
|
||||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
|
|
||||||
msg='accept matching `expected_type` type')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
|
|
||||||
msg='reject non matching `expected_type` type')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
|
|
||||||
msg='transform type using type function')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
|
|
||||||
expected_type=lambda _: 1 / 0), None,
|
|
||||||
msg='wrap expected_type function in try_call')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
|
|
||||||
msg='eliminate items that expected_type fails on')
|
|
||||||
|
|
||||||
# Test get_all behavior
|
|
||||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
|
||||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
|
|
||||||
msg='if not `get_all`, return only first matching value')
|
|
||||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
|
|
||||||
msg='do not overflatten if not `get_all`')
|
|
||||||
|
|
||||||
# Test casesense behavior
|
|
||||||
_CASESENSE_DATA = {
|
|
||||||
'KeY': 'value0',
|
|
||||||
0: {
|
|
||||||
'KeY': 'value1',
|
|
||||||
0: {'KeY': 'value2'},
|
|
||||||
},
|
|
||||||
# FULLWIDTH LATIN CAPITAL LETTER K
|
|
||||||
'\uff2bey': 'value3',
|
|
||||||
}
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
|
||||||
msg='dict keys should be case sensitive unless `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
|
||||||
casesense=False), 'value0',
|
|
||||||
msg='allow non matching key case if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
|
|
||||||
casesense=False), 'value3',
|
|
||||||
msg='allow non matching Unicode key case if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
|
||||||
casesense=False), ['value1'],
|
|
||||||
msg='allow non matching key case in branch if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
|
||||||
casesense=False), ['value2'],
|
|
||||||
msg='allow non matching key case in branch path if `casesense`')
|
|
||||||
|
|
||||||
# Test traverse_string behavior
|
|
||||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
|
||||||
msg='do not traverse into string if not `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
|
||||||
_traverse_string=True), 's',
|
|
||||||
msg='traverse into string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
|
||||||
_traverse_string=True), '.',
|
|
||||||
msg='traverse into converted data if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
|
||||||
_traverse_string=True), list('str'),
|
|
||||||
msg='`...` branching into string should result in list')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
|
||||||
_traverse_string=True), ['s', 'r'],
|
|
||||||
msg='branching into string should result in list')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
|
|
||||||
_traverse_string=True), list('str'),
|
|
||||||
msg='function branching into string should result in list')
|
|
||||||
|
|
||||||
# Test is_user_input behavior
|
|
||||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
|
||||||
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
|
|
||||||
_is_user_input=True), 3,
|
|
||||||
msg='allow for string indexing if `is_user_input`')
|
|
||||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
|
|
||||||
_is_user_input=True), tuple(range(8))[3:],
|
|
||||||
msg='allow for string slice if `is_user_input`')
|
|
||||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
|
|
||||||
_is_user_input=True), tuple(range(8))[:4:2],
|
|
||||||
msg='allow step in string slice if `is_user_input`')
|
|
||||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
|
|
||||||
_is_user_input=True), range(8),
|
|
||||||
msg='`:` should be treated as `...` if `is_user_input`')
|
|
||||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
|
||||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
|
|
||||||
|
|
||||||
# Test re.Match as input obj
|
|
||||||
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
|
|
||||||
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
|
|
||||||
msg='`...` on a `re.Match` should give its `groups()`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
|
||||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
|
||||||
msg='str key on a `re.Match` should give group with that name')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
|
||||||
msg='int key on a `re.Match` should give group with that name')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
|
||||||
msg='str key on a `re.Match` should respect casesense')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
|
||||||
msg='failing int key on a `re.Match` should return `default`')
|
|
||||||
|
|
||||||
def test_get_first(self):
|
|
||||||
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
|
||||||
|
|
||||||
def test_join_nonempty(self):
|
def test_join_nonempty(self):
|
||||||
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
|
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
|
||||||
|
@ -11,12 +11,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||||||
from test.helper import get_params, try_rm
|
from test.helper import get_params, try_rm
|
||||||
|
|
||||||
|
|
||||||
import io
|
|
||||||
|
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
import youtube_dl.YoutubeDL
|
import youtube_dl.YoutubeDL
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
|
from youtube_dl.compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(youtube_dl.YoutubeDL):
|
class YoutubeDL(youtube_dl.YoutubeDL):
|
||||||
@ -51,7 +50,7 @@ class TestAnnotations(unittest.TestCase):
|
|||||||
ydl.download([TEST_ID])
|
ydl.download([TEST_ID])
|
||||||
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
|
self.assertTrue(os.path.exists(ANNOTATIONS_FILE))
|
||||||
annoxml = None
|
annoxml = None
|
||||||
with io.open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
|
with open(ANNOTATIONS_FILE, 'r', encoding='utf-8') as annof:
|
||||||
annoxml = xml.etree.ElementTree.parse(annof)
|
annoxml = xml.etree.ElementTree.parse(annof)
|
||||||
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
|
self.assertTrue(annoxml is not None, 'Failed to parse annotations XML')
|
||||||
root = annoxml.getroot()
|
root = annoxml.getroot()
|
||||||
|
@ -8,11 +8,14 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import io
|
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
|
|
||||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
from youtube_dl.compat import (
|
||||||
|
compat_open as open,
|
||||||
|
compat_str,
|
||||||
|
compat_urlretrieve,
|
||||||
|
)
|
||||||
|
|
||||||
from test.helper import FakeYDL
|
from test.helper import FakeYDL
|
||||||
from youtube_dl.extractor import YoutubeIE
|
from youtube_dl.extractor import YoutubeIE
|
||||||
@ -143,6 +146,42 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||||
|
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||||
|
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
|
||||||
|
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
|
||||||
|
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
|
||||||
|
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
|
||||||
|
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||||
|
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -196,7 +235,7 @@ def t_factory(name, sig_func, url_pattern):
|
|||||||
|
|
||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
compat_urlretrieve(url, fn)
|
compat_urlretrieve(url, fn)
|
||||||
with io.open(fn, encoding='utf-8') as testf:
|
with open(fn, encoding='utf-8') as testf:
|
||||||
jscode = testf.read()
|
jscode = testf.read()
|
||||||
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
self.assertEqual(sig_func(jscode, sig_input), expected_sig)
|
||||||
|
|
||||||
|
35
test/testdata/mpd/range_only.mpd
vendored
Normal file
35
test/testdata/mpd/range_only.mpd
vendored
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<!-- MPD file Generated with GPAC version 1.0.1-revrelease at 2021-11-27T20:53:11.690Z -->
|
||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H0M30.196S" maxSegmentDuration="PT0H0M10.027S" profiles="urn:mpeg:dash:profile:full:2011">
|
||||||
|
<ProgramInformation moreInformationURL="http://gpac.io">
|
||||||
|
<Title>manifest.mpd generated by GPAC</Title>
|
||||||
|
</ProgramInformation>
|
||||||
|
|
||||||
|
<Period duration="PT0H0M30.196S">
|
||||||
|
<AdaptationSet segmentAlignment="true" maxWidth="768" maxHeight="432" maxFrameRate="30000/1001" par="16:9" lang="und" startWithSAP="1">
|
||||||
|
<Representation id="1" mimeType="video/mp4" codecs="avc1.4D401E" width="768" height="432" frameRate="30000/1001" sar="1:1" bandwidth="526987">
|
||||||
|
<BaseURL>video_dashinit.mp4</BaseURL>
|
||||||
|
<SegmentList timescale="90000" duration="900000">
|
||||||
|
<Initialization range="0-881"/>
|
||||||
|
<SegmentURL mediaRange="882-876094" indexRange="882-925"/>
|
||||||
|
<SegmentURL mediaRange="876095-1466732" indexRange="876095-876138"/>
|
||||||
|
<SegmentURL mediaRange="1466733-1953615" indexRange="1466733-1466776"/>
|
||||||
|
<SegmentURL mediaRange="1953616-1994211" indexRange="1953616-1953659"/>
|
||||||
|
</SegmentList>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet segmentAlignment="true" lang="und" startWithSAP="1">
|
||||||
|
<Representation id="2" mimeType="audio/mp4" codecs="mp4a.40.2" audioSamplingRate="48000" bandwidth="98096">
|
||||||
|
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
|
||||||
|
<BaseURL>audio_dashinit.mp4</BaseURL>
|
||||||
|
<SegmentList timescale="48000" duration="480000">
|
||||||
|
<Initialization range="0-752"/>
|
||||||
|
<SegmentURL mediaRange="753-124129" indexRange="753-796"/>
|
||||||
|
<SegmentURL mediaRange="124130-250544" indexRange="124130-124173"/>
|
||||||
|
<SegmentURL mediaRange="250545-374929" indexRange="250545-250588"/>
|
||||||
|
</SegmentList>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>
|
||||||
|
|
351
test/testdata/mpd/subtitles.mpd
vendored
Normal file
351
test/testdata/mpd/subtitles.mpd
vendored
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
|
||||||
|
<MPD
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xmlns="urn:mpeg:dash:schema:mpd:2011"
|
||||||
|
xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
|
||||||
|
type="static"
|
||||||
|
mediaPresentationDuration="PT14M48S"
|
||||||
|
maxSegmentDuration="PT1M"
|
||||||
|
minBufferTime="PT10S"
|
||||||
|
profiles="urn:mpeg:dash:profile:isoff-live:2011">
|
||||||
|
<Period
|
||||||
|
id="1"
|
||||||
|
duration="PT14M48S">
|
||||||
|
<BaseURL>dash/</BaseURL>
|
||||||
|
<AdaptationSet
|
||||||
|
id="1"
|
||||||
|
group="1"
|
||||||
|
contentType="audio"
|
||||||
|
segmentAlignment="true"
|
||||||
|
audioSamplingRate="48000"
|
||||||
|
mimeType="audio/mp4"
|
||||||
|
codecs="mp4a.40.2"
|
||||||
|
startWithSAP="1">
|
||||||
|
<AudioChannelConfiguration
|
||||||
|
schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
|
||||||
|
value="2" />
|
||||||
|
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
|
||||||
|
<SegmentTemplate
|
||||||
|
timescale="48000"
|
||||||
|
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
|
||||||
|
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
|
||||||
|
<SegmentTimeline>
|
||||||
|
<S t="0" d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="96256" r="2" />
|
||||||
|
<S d="95232" />
|
||||||
|
<S d="3584" />
|
||||||
|
</SegmentTimeline>
|
||||||
|
</SegmentTemplate>
|
||||||
|
<Representation
|
||||||
|
id="audio=128001"
|
||||||
|
bandwidth="128001">
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet
|
||||||
|
id="2"
|
||||||
|
group="3"
|
||||||
|
contentType="text"
|
||||||
|
lang="en"
|
||||||
|
mimeType="application/mp4"
|
||||||
|
codecs="stpp"
|
||||||
|
startWithSAP="1">
|
||||||
|
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
|
||||||
|
<SegmentTemplate
|
||||||
|
timescale="1000"
|
||||||
|
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
|
||||||
|
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
|
||||||
|
<SegmentTimeline>
|
||||||
|
<S t="0" d="60000" r="9" />
|
||||||
|
<S d="24000" />
|
||||||
|
</SegmentTimeline>
|
||||||
|
</SegmentTemplate>
|
||||||
|
<Representation
|
||||||
|
id="textstream_eng=1000"
|
||||||
|
bandwidth="1000">
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<AdaptationSet
|
||||||
|
id="3"
|
||||||
|
group="2"
|
||||||
|
contentType="video"
|
||||||
|
par="960:409"
|
||||||
|
minBandwidth="100000"
|
||||||
|
maxBandwidth="4482000"
|
||||||
|
maxWidth="1689"
|
||||||
|
maxHeight="720"
|
||||||
|
segmentAlignment="true"
|
||||||
|
mimeType="video/mp4"
|
||||||
|
codecs="avc1.4D401F"
|
||||||
|
startWithSAP="1">
|
||||||
|
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
|
||||||
|
<SegmentTemplate
|
||||||
|
timescale="12288"
|
||||||
|
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
|
||||||
|
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
|
||||||
|
<SegmentTimeline>
|
||||||
|
<S t="0" d="24576" r="443" />
|
||||||
|
</SegmentTimeline>
|
||||||
|
</SegmentTemplate>
|
||||||
|
<Representation
|
||||||
|
id="video=100000"
|
||||||
|
bandwidth="100000"
|
||||||
|
width="336"
|
||||||
|
height="144"
|
||||||
|
sar="2880:2863"
|
||||||
|
scanType="progressive">
|
||||||
|
</Representation>
|
||||||
|
<Representation
|
||||||
|
id="video=326000"
|
||||||
|
bandwidth="326000"
|
||||||
|
width="562"
|
||||||
|
height="240"
|
||||||
|
sar="115200:114929"
|
||||||
|
scanType="progressive">
|
||||||
|
</Representation>
|
||||||
|
<Representation
|
||||||
|
id="video=698000"
|
||||||
|
bandwidth="698000"
|
||||||
|
width="844"
|
||||||
|
height="360"
|
||||||
|
sar="86400:86299"
|
||||||
|
scanType="progressive">
|
||||||
|
</Representation>
|
||||||
|
<Representation
|
||||||
|
id="video=1493000"
|
||||||
|
bandwidth="1493000"
|
||||||
|
width="1126"
|
||||||
|
height="480"
|
||||||
|
sar="230400:230267"
|
||||||
|
scanType="progressive">
|
||||||
|
</Representation>
|
||||||
|
<Representation
|
||||||
|
id="video=4482000"
|
||||||
|
bandwidth="4482000"
|
||||||
|
width="1688"
|
||||||
|
height="720"
|
||||||
|
sar="86400:86299"
|
||||||
|
scanType="progressive">
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>
|
32
test/testdata/mpd/url_and_range.mpd
vendored
Normal file
32
test/testdata/mpd/url_and_range.mpd
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0" ?>
|
||||||
|
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT10.01S" mediaPresentationDuration="PT30.097S" type="static">
|
||||||
|
<!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-639 -->
|
||||||
|
<Period>
|
||||||
|
<!-- Video -->
|
||||||
|
<AdaptationSet mimeType="video/mp4" segmentAlignment="true" startWithSAP="1" maxWidth="768" maxHeight="432">
|
||||||
|
<Representation id="video-avc1" codecs="avc1.4D401E" width="768" height="432" scanType="progressive" frameRate="30000/1001" bandwidth="699597">
|
||||||
|
<SegmentList timescale="1000" duration="10010">
|
||||||
|
<Initialization sourceURL="video-frag.mp4" range="36-746"/>
|
||||||
|
<SegmentURL media="video-frag.mp4" mediaRange="747-876117"/>
|
||||||
|
<SegmentURL media="video-frag.mp4" mediaRange="876118-1466913"/>
|
||||||
|
<SegmentURL media="video-frag.mp4" mediaRange="1466914-1953954"/>
|
||||||
|
<SegmentURL media="video-frag.mp4" mediaRange="1953955-1994652"/>
|
||||||
|
</SegmentList>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
<!-- Audio -->
|
||||||
|
<AdaptationSet mimeType="audio/mp4" startWithSAP="1" segmentAlignment="true">
|
||||||
|
<Representation id="audio-und-mp4a.40.2" codecs="mp4a.40.2" bandwidth="98808" audioSamplingRate="48000">
|
||||||
|
<AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
|
||||||
|
<SegmentList timescale="1000" duration="10010">
|
||||||
|
<Initialization sourceURL="audio-frag.mp4" range="32-623"/>
|
||||||
|
<SegmentURL media="audio-frag.mp4" mediaRange="624-124199"/>
|
||||||
|
<SegmentURL media="audio-frag.mp4" mediaRange="124200-250303"/>
|
||||||
|
<SegmentURL media="audio-frag.mp4" mediaRange="250304-374365"/>
|
||||||
|
<SegmentURL media="audio-frag.mp4" mediaRange="374366-374836"/>
|
||||||
|
</SegmentList>
|
||||||
|
</Representation>
|
||||||
|
</AdaptationSet>
|
||||||
|
</Period>
|
||||||
|
</MPD>
|
||||||
|
|
@ -4,11 +4,10 @@
|
|||||||
from __future__ import absolute_import, unicode_literals
|
from __future__ import absolute_import, unicode_literals
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import functools
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
@ -26,15 +25,26 @@ import tokenize
|
|||||||
import traceback
|
import traceback
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
try:
|
||||||
|
from ssl import OPENSSL_VERSION
|
||||||
|
except ImportError:
|
||||||
|
# Must be Python 2.6, should be built against 1.0.2
|
||||||
|
OPENSSL_VERSION = 'OpenSSL 1.0.2(?)'
|
||||||
from string import ascii_letters
|
from string import ascii_letters
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_cookiejar,
|
compat_collections_chain_map as ChainMap,
|
||||||
|
compat_filter as filter,
|
||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_http_cookiejar_Cookie,
|
||||||
|
compat_http_cookies_SimpleCookie,
|
||||||
|
compat_integer_types,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_map as map,
|
||||||
compat_numeric_types,
|
compat_numeric_types,
|
||||||
|
compat_open as open,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_tokenize_tokenize,
|
compat_tokenize_tokenize,
|
||||||
@ -44,8 +54,10 @@ from .compat import (
|
|||||||
compat_urllib_request_DataHandler,
|
compat_urllib_request_DataHandler,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
_UnsafeExtensionError,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
|
bug_reports_message,
|
||||||
ContentTooShortError,
|
ContentTooShortError,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
DateRange,
|
DateRange,
|
||||||
@ -61,10 +73,11 @@ from .utils import (
|
|||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
HEADRequest,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
|
join_nonempty,
|
||||||
locked_file,
|
locked_file,
|
||||||
|
LazyList,
|
||||||
make_HTTPS_handler,
|
make_HTTPS_handler,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
@ -76,7 +89,6 @@ from .utils import (
|
|||||||
preferredencoding,
|
preferredencoding,
|
||||||
prepend_extension,
|
prepend_extension,
|
||||||
process_communicate_or_kill,
|
process_communicate_or_kill,
|
||||||
PUTRequest,
|
|
||||||
register_socks_protocols,
|
register_socks_protocols,
|
||||||
render_table,
|
render_table,
|
||||||
replace_extension,
|
replace_extension,
|
||||||
@ -88,6 +100,7 @@ from .utils import (
|
|||||||
std_headers,
|
std_headers,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
|
traverse_obj,
|
||||||
UnavailableVideoError,
|
UnavailableVideoError,
|
||||||
url_basename,
|
url_basename,
|
||||||
version_tuple,
|
version_tuple,
|
||||||
@ -97,6 +110,7 @@ from .utils import (
|
|||||||
YoutubeDLCookieProcessor,
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
YoutubeDLRedirectHandler,
|
YoutubeDLRedirectHandler,
|
||||||
|
ytdl_is_updateable,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
@ -117,6 +131,20 @@ if compat_os_name == 'nt':
|
|||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
|
||||||
|
def _catch_unsafe_file_extension(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
except _UnsafeExtensionError as error:
|
||||||
|
self.report_error(
|
||||||
|
'{0} found; to avoid damaging your system, this value is disallowed.'
|
||||||
|
' If you believe this is an error{1}'.format(
|
||||||
|
error_to_compat_str(error), bug_reports_message(',')))
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL(object):
|
class YoutubeDL(object):
|
||||||
"""YoutubeDL class.
|
"""YoutubeDL class.
|
||||||
|
|
||||||
@ -366,6 +394,9 @@ class YoutubeDL(object):
|
|||||||
self.params.update(params)
|
self.params.update(params)
|
||||||
self.cache = Cache(self)
|
self.cache = Cache(self)
|
||||||
|
|
||||||
|
self._header_cookies = []
|
||||||
|
self._load_cookies_from_headers(self.params.get('http_headers'))
|
||||||
|
|
||||||
def check_deprecated(param, option, suggestion):
|
def check_deprecated(param, option, suggestion):
|
||||||
if self.params.get(param) is not None:
|
if self.params.get(param) is not None:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
@ -572,7 +603,7 @@ class YoutubeDL(object):
|
|||||||
if self.params.get('cookiefile') is not None:
|
if self.params.get('cookiefile') is not None:
|
||||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||||
|
|
||||||
def trouble(self, message=None, tb=None):
|
def trouble(self, *args, **kwargs):
|
||||||
"""Determine action to take when a download problem appears.
|
"""Determine action to take when a download problem appears.
|
||||||
|
|
||||||
Depending on if the downloader has been configured to ignore
|
Depending on if the downloader has been configured to ignore
|
||||||
@ -581,6 +612,11 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
tb, if given, is additional traceback information.
|
tb, if given, is additional traceback information.
|
||||||
"""
|
"""
|
||||||
|
# message=None, tb=None, is_error=True
|
||||||
|
message = args[0] if len(args) > 0 else kwargs.get('message', None)
|
||||||
|
tb = args[1] if len(args) > 1 else kwargs.get('tb', None)
|
||||||
|
is_error = args[2] if len(args) > 2 else kwargs.get('is_error', True)
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
self.to_stderr(message)
|
self.to_stderr(message)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
@ -593,7 +629,10 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
tb_data = traceback.format_list(traceback.extract_stack())
|
tb_data = traceback.format_list(traceback.extract_stack())
|
||||||
tb = ''.join(tb_data)
|
tb = ''.join(tb_data)
|
||||||
|
if tb:
|
||||||
self.to_stderr(tb)
|
self.to_stderr(tb)
|
||||||
|
if not is_error:
|
||||||
|
return
|
||||||
if not self.params.get('ignoreerrors', False):
|
if not self.params.get('ignoreerrors', False):
|
||||||
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
|
||||||
exc_info = sys.exc_info()[1].exc_info
|
exc_info = sys.exc_info()[1].exc_info
|
||||||
@ -602,11 +641,18 @@ class YoutubeDL(object):
|
|||||||
raise DownloadError(message, exc_info)
|
raise DownloadError(message, exc_info)
|
||||||
self._download_retcode = 1
|
self._download_retcode = 1
|
||||||
|
|
||||||
def report_warning(self, message):
|
def report_warning(self, message, only_once=False, _cache={}):
|
||||||
'''
|
'''
|
||||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
|
if only_once:
|
||||||
|
m_hash = hash((self, message))
|
||||||
|
m_cnt = _cache.setdefault(m_hash, 0)
|
||||||
|
_cache[m_hash] = m_cnt + 1
|
||||||
|
if m_cnt > 0:
|
||||||
|
return
|
||||||
|
|
||||||
if self.params.get('logger') is not None:
|
if self.params.get('logger') is not None:
|
||||||
self.params['logger'].warning(message)
|
self.params['logger'].warning(message)
|
||||||
else:
|
else:
|
||||||
@ -619,7 +665,7 @@ class YoutubeDL(object):
|
|||||||
warning_message = '%s %s' % (_msg_header, message)
|
warning_message = '%s %s' % (_msg_header, message)
|
||||||
self.to_stderr(warning_message)
|
self.to_stderr(warning_message)
|
||||||
|
|
||||||
def report_error(self, message, tb=None):
|
def report_error(self, message, *args, **kwargs):
|
||||||
'''
|
'''
|
||||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||||
in red if stderr is a tty file.
|
in red if stderr is a tty file.
|
||||||
@ -628,8 +674,18 @@ class YoutubeDL(object):
|
|||||||
_msg_header = '\033[0;31mERROR:\033[0m'
|
_msg_header = '\033[0;31mERROR:\033[0m'
|
||||||
else:
|
else:
|
||||||
_msg_header = 'ERROR:'
|
_msg_header = 'ERROR:'
|
||||||
error_message = '%s %s' % (_msg_header, message)
|
kwargs['message'] = '%s %s' % (_msg_header, message)
|
||||||
self.trouble(error_message, tb)
|
self.trouble(*args, **kwargs)
|
||||||
|
|
||||||
|
def report_unscoped_cookies(self, *args, **kwargs):
|
||||||
|
# message=None, tb=False, is_error=False
|
||||||
|
if len(args) <= 2:
|
||||||
|
kwargs.setdefault('is_error', False)
|
||||||
|
if len(args) <= 0:
|
||||||
|
kwargs.setdefault(
|
||||||
|
'message',
|
||||||
|
'Unscoped cookies are not allowed: please specify some sort of scoping')
|
||||||
|
self.report_error(*args, **kwargs)
|
||||||
|
|
||||||
def report_file_already_downloaded(self, file_name):
|
def report_file_already_downloaded(self, file_name):
|
||||||
"""Report file has already been fully downloaded."""
|
"""Report file has already been fully downloaded."""
|
||||||
@ -825,7 +881,7 @@ class YoutubeDL(object):
|
|||||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
self.report_error(msg)
|
self.report_error(msg)
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), tb=e.format_traceback())
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -835,8 +891,83 @@ class YoutubeDL(object):
|
|||||||
raise
|
raise
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
def _remove_cookie_header(self, http_headers):
|
||||||
|
"""Filters out `Cookie` header from an `http_headers` dict
|
||||||
|
The `Cookie` header is removed to prevent leaks as a result of unscoped cookies.
|
||||||
|
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||||
|
|
||||||
|
@param http_headers An `http_headers` dict from which any `Cookie` header
|
||||||
|
should be removed, or None
|
||||||
|
"""
|
||||||
|
return dict(filter(lambda pair: pair[0].lower() != 'cookie', (http_headers or {}).items()))
|
||||||
|
|
||||||
|
def _load_cookies(self, data, **kwargs):
|
||||||
|
"""Loads cookies from a `Cookie` header
|
||||||
|
|
||||||
|
This tries to work around the security vulnerability of passing cookies to every domain.
|
||||||
|
|
||||||
|
@param data The Cookie header as a string to load the cookies from
|
||||||
|
@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
|
||||||
|
If `True`, save cookies for later to be stored in the jar with a limited scope
|
||||||
|
If a URL, save cookies in the jar with the domain of the URL
|
||||||
|
"""
|
||||||
|
# autoscope=True (kw-only)
|
||||||
|
autoscope = kwargs.get('autoscope', True)
|
||||||
|
|
||||||
|
for cookie in compat_http_cookies_SimpleCookie(data).values() if data else []:
|
||||||
|
if autoscope and any(cookie.values()):
|
||||||
|
raise ValueError('Invalid syntax in Cookie Header')
|
||||||
|
|
||||||
|
domain = cookie.get('domain') or ''
|
||||||
|
expiry = cookie.get('expires')
|
||||||
|
if expiry == '': # 0 is valid so we check for `''` explicitly
|
||||||
|
expiry = None
|
||||||
|
prepared_cookie = compat_http_cookiejar_Cookie(
|
||||||
|
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
|
||||||
|
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
|
||||||
|
bool(cookie.get('secure')), expiry, False, None, None, {})
|
||||||
|
|
||||||
|
if domain:
|
||||||
|
self.cookiejar.set_cookie(prepared_cookie)
|
||||||
|
elif autoscope is True:
|
||||||
|
self.report_warning(
|
||||||
|
'Passing cookies as a header is a potential security risk; '
|
||||||
|
'they will be scoped to the domain of the downloaded urls. '
|
||||||
|
'Please consider loading cookies from a file or browser instead.',
|
||||||
|
only_once=True)
|
||||||
|
self._header_cookies.append(prepared_cookie)
|
||||||
|
elif autoscope:
|
||||||
|
self.report_warning(
|
||||||
|
'The extractor result contains an unscoped cookie as an HTTP header. '
|
||||||
|
'If you are specifying an input URL, ' + bug_reports_message(),
|
||||||
|
only_once=True)
|
||||||
|
self._apply_header_cookies(autoscope, [prepared_cookie])
|
||||||
|
else:
|
||||||
|
self.report_unscoped_cookies()
|
||||||
|
|
||||||
|
def _load_cookies_from_headers(self, headers):
|
||||||
|
self._load_cookies(traverse_obj(headers, 'cookie', casesense=False))
|
||||||
|
|
||||||
|
def _apply_header_cookies(self, url, cookies=None):
|
||||||
|
"""This method applies stray header cookies to the provided url
|
||||||
|
|
||||||
|
This loads header cookies and scopes them to the domain provided in `url`.
|
||||||
|
While this is not ideal, it helps reduce the risk of them being sent to
|
||||||
|
an unintended destination.
|
||||||
|
"""
|
||||||
|
parsed = compat_urllib_parse.urlparse(url)
|
||||||
|
if not parsed.hostname:
|
||||||
|
return
|
||||||
|
|
||||||
|
for cookie in map(copy.copy, cookies or self._header_cookies):
|
||||||
|
cookie.domain = '.' + parsed.hostname
|
||||||
|
self.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
@__handle_extraction_exceptions
|
@__handle_extraction_exceptions
|
||||||
def __extract_info(self, url, ie, download, extra_info, process):
|
def __extract_info(self, url, ie, download, extra_info, process):
|
||||||
|
# Compat with passing cookies in http headers
|
||||||
|
self._apply_header_cookies(url)
|
||||||
|
|
||||||
ie_result = ie.extract(url)
|
ie_result = ie.extract(url)
|
||||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||||
return
|
return
|
||||||
@ -862,7 +993,7 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||||
"""
|
"""
|
||||||
Take the result of the ie(may be modified) and resolve all unresolved
|
Take the result of the ie (may be modified) and resolve all unresolved
|
||||||
references (URLs, playlist items).
|
references (URLs, playlist items).
|
||||||
|
|
||||||
It will also download the videos if 'download'.
|
It will also download the videos if 'download'.
|
||||||
@ -924,8 +1055,8 @@ class YoutubeDL(object):
|
|||||||
elif result_type in ('playlist', 'multi_video'):
|
elif result_type in ('playlist', 'multi_video'):
|
||||||
# Protect from infinite recursion due to recursively nested playlists
|
# Protect from infinite recursion due to recursively nested playlists
|
||||||
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||||
webpage_url = ie_result['webpage_url']
|
webpage_url = ie_result.get('webpage_url') # not all pl/mv have this
|
||||||
if webpage_url in self._playlist_urls:
|
if webpage_url and webpage_url in self._playlist_urls:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] Skipping already downloaded playlist: %s'
|
'[download] Skipping already downloaded playlist: %s'
|
||||||
% ie_result.get('title') or ie_result.get('id'))
|
% ie_result.get('title') or ie_result.get('id'))
|
||||||
@ -933,6 +1064,10 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
self._playlist_level += 1
|
self._playlist_level += 1
|
||||||
self._playlist_urls.add(webpage_url)
|
self._playlist_urls.add(webpage_url)
|
||||||
|
new_result = dict((k, v) for k, v in extra_info.items() if k not in ie_result)
|
||||||
|
if new_result:
|
||||||
|
new_result.update(ie_result)
|
||||||
|
ie_result = new_result
|
||||||
try:
|
try:
|
||||||
return self.__process_playlist(ie_result, download)
|
return self.__process_playlist(ie_result, download)
|
||||||
finally:
|
finally:
|
||||||
@ -1389,17 +1524,16 @@ class YoutubeDL(object):
|
|||||||
'abr': formats_info[1].get('abr'),
|
'abr': formats_info[1].get('abr'),
|
||||||
'ext': output_ext,
|
'ext': output_ext,
|
||||||
}
|
}
|
||||||
video_selector, audio_selector = map(_build_selector_function, selector.selector)
|
|
||||||
|
|
||||||
def selector_function(ctx):
|
def selector_function(ctx):
|
||||||
for pair in itertools.product(
|
selector_fn = lambda x: _build_selector_function(x)(ctx)
|
||||||
video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
|
for pair in itertools.product(*map(selector_fn, selector.selector)):
|
||||||
yield _merge(pair)
|
yield _merge(pair)
|
||||||
|
|
||||||
filters = [self._build_format_filter(f) for f in selector.filters]
|
filters = [self._build_format_filter(f) for f in selector.filters]
|
||||||
|
|
||||||
def final_selector(ctx):
|
def final_selector(ctx):
|
||||||
ctx_copy = copy.deepcopy(ctx)
|
ctx_copy = dict(ctx)
|
||||||
for _filter in filters:
|
for _filter in filters:
|
||||||
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
|
||||||
return selector_function(ctx_copy)
|
return selector_function(ctx_copy)
|
||||||
@ -1434,29 +1568,73 @@ class YoutubeDL(object):
|
|||||||
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
||||||
return _build_selector_function(parsed_selector)
|
return _build_selector_function(parsed_selector)
|
||||||
|
|
||||||
def _calc_headers(self, info_dict):
|
def _calc_headers(self, info_dict, load_cookies=False):
|
||||||
res = std_headers.copy()
|
if load_cookies: # For --load-info-json
|
||||||
|
# load cookies from http_headers in legacy info.json
|
||||||
|
self._load_cookies(traverse_obj(info_dict, ('http_headers', 'Cookie'), casesense=False),
|
||||||
|
autoscope=info_dict['url'])
|
||||||
|
# load scoped cookies from info.json
|
||||||
|
self._load_cookies(info_dict.get('cookies'), autoscope=False)
|
||||||
|
|
||||||
add_headers = info_dict.get('http_headers')
|
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
||||||
if add_headers:
|
|
||||||
res.update(add_headers)
|
|
||||||
|
|
||||||
cookies = self._calc_cookies(info_dict)
|
|
||||||
if cookies:
|
if cookies:
|
||||||
res['Cookie'] = cookies
|
# Make a string like name1=val1; attr1=a_val1; ...name2=val2; ...
|
||||||
|
# By convention a cookie name can't be a well-known attribute name
|
||||||
|
# so this syntax is unambiguous and can be parsed by (eg) SimpleCookie
|
||||||
|
encoder = compat_http_cookies_SimpleCookie()
|
||||||
|
values = []
|
||||||
|
attributes = (('Domain', '='), ('Path', '='), ('Secure',), ('Expires', '='), ('Version', '='))
|
||||||
|
attributes = tuple([x[0].lower()] + list(x) for x in attributes)
|
||||||
|
for cookie in cookies:
|
||||||
|
_, value = encoder.value_encode(cookie.value)
|
||||||
|
# Py 2 '' --> '', Py 3 '' --> '""'
|
||||||
|
if value == '':
|
||||||
|
value = '""'
|
||||||
|
values.append('='.join((cookie.name, value)))
|
||||||
|
for attr in attributes:
|
||||||
|
value = getattr(cookie, attr[0], None)
|
||||||
|
if value:
|
||||||
|
values.append('%s%s' % (''.join(attr[1:]), value if len(attr) == 3 else ''))
|
||||||
|
info_dict['cookies'] = '; '.join(values)
|
||||||
|
|
||||||
|
res = std_headers.copy()
|
||||||
|
res.update(info_dict.get('http_headers') or {})
|
||||||
|
res = self._remove_cookie_header(res)
|
||||||
|
|
||||||
if 'X-Forwarded-For' not in res:
|
if 'X-Forwarded-For' not in res:
|
||||||
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
||||||
if x_forwarded_for_ip:
|
if x_forwarded_for_ip:
|
||||||
res['X-Forwarded-For'] = x_forwarded_for_ip
|
res['X-Forwarded-For'] = x_forwarded_for_ip
|
||||||
|
|
||||||
return res
|
return res or None
|
||||||
|
|
||||||
def _calc_cookies(self, info_dict):
|
def _calc_cookies(self, info_dict):
|
||||||
pr = sanitized_Request(info_dict['url'])
|
pr = sanitized_Request(info_dict['url'])
|
||||||
self.cookiejar.add_cookie_header(pr)
|
self.cookiejar.add_cookie_header(pr)
|
||||||
return pr.get_header('Cookie')
|
return pr.get_header('Cookie')
|
||||||
|
|
||||||
|
def _fill_common_fields(self, info_dict, final=True):
|
||||||
|
|
||||||
|
for ts_key, date_key in (
|
||||||
|
('timestamp', 'upload_date'),
|
||||||
|
('release_timestamp', 'release_date'),
|
||||||
|
):
|
||||||
|
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||||
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
|
# see http://bugs.python.org/issue1646728)
|
||||||
|
try:
|
||||||
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
|
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
||||||
|
except (ValueError, OverflowError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Auto generate title fields corresponding to the *_number fields when missing
|
||||||
|
# in order to always have clean titles. This is very common for TV series.
|
||||||
|
if final:
|
||||||
|
for field in ('chapter', 'season', 'episode'):
|
||||||
|
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||||
|
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||||
|
|
||||||
def process_video_result(self, info_dict, download=True):
|
def process_video_result(self, info_dict, download=True):
|
||||||
assert info_dict.get('_type', 'video') == 'video'
|
assert info_dict.get('_type', 'video') == 'video'
|
||||||
|
|
||||||
@ -1524,24 +1702,7 @@ class YoutubeDL(object):
|
|||||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
for ts_key, date_key in (
|
self._fill_common_fields(info_dict)
|
||||||
('timestamp', 'upload_date'),
|
|
||||||
('release_timestamp', 'release_date'),
|
|
||||||
):
|
|
||||||
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
|
||||||
# see http://bugs.python.org/issue1646728)
|
|
||||||
try:
|
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
|
||||||
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
|
||||||
except (ValueError, OverflowError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Auto generate title fields corresponding to the *_number fields when missing
|
|
||||||
# in order to always have clean titles. This is very common for TV series.
|
|
||||||
for field in ('chapter', 'season', 'episode'):
|
|
||||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
|
||||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
|
||||||
|
|
||||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||||
cc = info_dict.get(cc_kind)
|
cc = info_dict.get(cc_kind)
|
||||||
@ -1629,10 +1790,13 @@ class YoutubeDL(object):
|
|||||||
format['protocol'] = determine_protocol(format)
|
format['protocol'] = determine_protocol(format)
|
||||||
# Add HTTP headers, so that external programs can use them from the
|
# Add HTTP headers, so that external programs can use them from the
|
||||||
# json output
|
# json output
|
||||||
full_format_info = info_dict.copy()
|
format['http_headers'] = self._calc_headers(ChainMap(format, info_dict), load_cookies=True)
|
||||||
full_format_info.update(format)
|
|
||||||
format['http_headers'] = self._calc_headers(full_format_info)
|
# Safeguard against old/insecure infojson when using --load-info-json
|
||||||
# Remove private housekeeping stuff
|
info_dict['http_headers'] = self._remove_cookie_header(
|
||||||
|
info_dict.get('http_headers') or {}) or None
|
||||||
|
|
||||||
|
# Remove private housekeeping stuff (copied to http_headers in _calc_headers())
|
||||||
if '__x_forwarded_for_ip' in info_dict:
|
if '__x_forwarded_for_ip' in info_dict:
|
||||||
del info_dict['__x_forwarded_for_ip']
|
del info_dict['__x_forwarded_for_ip']
|
||||||
|
|
||||||
@ -1775,8 +1939,9 @@ class YoutubeDL(object):
|
|||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||||
print_mandatory('format')
|
print_mandatory('format')
|
||||||
if self.params.get('forcejson', False):
|
if self.params.get('forcejson', False):
|
||||||
self.to_stdout(json.dumps(info_dict))
|
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||||
|
|
||||||
|
@_catch_unsafe_file_extension
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result."""
|
"""Process a single resolved IE result."""
|
||||||
|
|
||||||
@ -1835,7 +2000,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video description to: ' + descfn)
|
self.to_screen('[info] Writing video description to: ' + descfn)
|
||||||
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||||
descfile.write(info_dict['description'])
|
descfile.write(info_dict['description'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write description file ' + descfn)
|
self.report_error('Cannot write description file ' + descfn)
|
||||||
@ -1850,7 +2015,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||||
with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||||
annofile.write(info_dict['annotations'])
|
annofile.write(info_dict['annotations'])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
self.report_warning('There are no annotations to write.')
|
self.report_warning('There are no annotations to write.')
|
||||||
@ -1877,7 +2042,7 @@ class YoutubeDL(object):
|
|||||||
try:
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_info['data'])
|
subfile.write(sub_info['data'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
self.report_error('Cannot write subtitles file ' + sub_filename)
|
self.report_error('Cannot write subtitles file ' + sub_filename)
|
||||||
@ -1886,24 +2051,16 @@ class YoutubeDL(object):
|
|||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
sub_data = ie._request_webpage(
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
sub_info['url'], info_dict['id'], note=False).read()
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
with open(encodeFilename(sub_filename), 'wb') as subfile:
|
||||||
subfile.write(sub_data)
|
subfile.write(sub_data)
|
||||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
(sub_lang, error_to_compat_str(err)))
|
(sub_lang, error_to_compat_str(err)))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self.params.get('writeinfojson', False):
|
self._write_info_json(
|
||||||
infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
|
'video description', info_dict,
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
|
replace_extension(filename, 'info.json', info_dict.get('ext')))
|
||||||
self.to_screen('[info] Video description metadata is already present')
|
|
||||||
else:
|
|
||||||
self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
|
|
||||||
try:
|
|
||||||
write_json_file(self.filter_requested_info(info_dict), infofn)
|
|
||||||
except (OSError, IOError):
|
|
||||||
self.report_error('Cannot write metadata to JSON file ' + infofn)
|
|
||||||
return
|
|
||||||
|
|
||||||
self._write_thumbnails(info_dict, filename)
|
self._write_thumbnails(info_dict, filename)
|
||||||
|
|
||||||
@ -1924,7 +2081,11 @@ class YoutubeDL(object):
|
|||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
return fd.download(name, info)
|
|
||||||
|
new_info = dict((k, v) for k, v in info.items() if not k.startswith('__p'))
|
||||||
|
new_info['http_headers'] = self._calc_headers(new_info)
|
||||||
|
|
||||||
|
return fd.download(name, new_info)
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
downloaded = []
|
downloaded = []
|
||||||
@ -1953,18 +2114,26 @@ class YoutubeDL(object):
|
|||||||
# TODO: Check acodec/vcodec
|
# TODO: Check acodec/vcodec
|
||||||
return False
|
return False
|
||||||
|
|
||||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
exts = [info_dict['ext']]
|
||||||
filename_wo_ext = (
|
|
||||||
os.path.splitext(filename)[0]
|
|
||||||
if filename_real_ext == info_dict['ext']
|
|
||||||
else filename)
|
|
||||||
requested_formats = info_dict['requested_formats']
|
requested_formats = info_dict['requested_formats']
|
||||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
||||||
info_dict['ext'] = 'mkv'
|
info_dict['ext'] = 'mkv'
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Requested formats are incompatible for merge and will be merged into mkv.')
|
'Requested formats are incompatible for merge and will be merged into mkv.')
|
||||||
|
exts.append(info_dict['ext'])
|
||||||
|
|
||||||
# Ensure filename always has a correct extension for successful merge
|
# Ensure filename always has a correct extension for successful merge
|
||||||
filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
|
def correct_ext(filename, ext=exts[1]):
|
||||||
|
if filename == '-':
|
||||||
|
return filename
|
||||||
|
f_name, f_real_ext = os.path.splitext(filename)
|
||||||
|
f_real_ext = f_real_ext[1:]
|
||||||
|
filename_wo_ext = f_name if f_real_ext in exts else filename
|
||||||
|
if ext is None:
|
||||||
|
ext = f_real_ext or None
|
||||||
|
return join_nonempty(filename_wo_ext, ext, delim='.')
|
||||||
|
|
||||||
|
filename = correct_ext(filename)
|
||||||
if os.path.exists(encodeFilename(filename)):
|
if os.path.exists(encodeFilename(filename)):
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'[download] %s has already been downloaded and '
|
'[download] %s has already been downloaded and '
|
||||||
@ -1974,8 +2143,9 @@ class YoutubeDL(object):
|
|||||||
new_info = dict(info_dict)
|
new_info = dict(info_dict)
|
||||||
new_info.update(f)
|
new_info.update(f)
|
||||||
fname = prepend_extension(
|
fname = prepend_extension(
|
||||||
self.prepare_filename(new_info),
|
correct_ext(
|
||||||
'f%s' % f['format_id'], new_info['ext'])
|
self.prepare_filename(new_info), new_info['ext']),
|
||||||
|
'f%s' % (f['format_id'],), new_info['ext'])
|
||||||
if not ensure_dir_exists(fname):
|
if not ensure_dir_exists(fname):
|
||||||
return
|
return
|
||||||
downloaded.append(fname)
|
downloaded.append(fname)
|
||||||
@ -2089,16 +2259,13 @@ class YoutubeDL(object):
|
|||||||
raise
|
raise
|
||||||
else:
|
else:
|
||||||
if self.params.get('dump_single_json', False):
|
if self.params.get('dump_single_json', False):
|
||||||
self.to_stdout(json.dumps(res))
|
self.to_stdout(json.dumps(self.sanitize_info(res)))
|
||||||
|
|
||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
def download_with_info_file(self, info_filename):
|
def download_with_info_file(self, info_filename):
|
||||||
with contextlib.closing(fileinput.FileInput(
|
with open(info_filename, encoding='utf-8') as f:
|
||||||
[info_filename], mode='r',
|
info = self.filter_requested_info(json.load(f))
|
||||||
openhook=fileinput.hook_encoded('utf-8'))) as f:
|
|
||||||
# FileInput doesn't have a read method, we can't call json.load
|
|
||||||
info = self.filter_requested_info(json.loads('\n'.join(f)))
|
|
||||||
try:
|
try:
|
||||||
self.process_ie_result(info, download=True)
|
self.process_ie_result(info, download=True)
|
||||||
except DownloadError:
|
except DownloadError:
|
||||||
@ -2111,10 +2278,36 @@ class YoutubeDL(object):
|
|||||||
return self._download_retcode
|
return self._download_retcode
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def filter_requested_info(info_dict):
|
def sanitize_info(info_dict, remove_private_keys=False):
|
||||||
return dict(
|
''' Sanitize the infodict for converting to json '''
|
||||||
(k, v) for k, v in info_dict.items()
|
if info_dict is None:
|
||||||
if k not in ['requested_formats', 'requested_subtitles'])
|
return info_dict
|
||||||
|
|
||||||
|
if remove_private_keys:
|
||||||
|
reject = lambda k, v: (v is None
|
||||||
|
or k.startswith('__')
|
||||||
|
or k in ('requested_formats',
|
||||||
|
'requested_subtitles'))
|
||||||
|
else:
|
||||||
|
reject = lambda k, v: False
|
||||||
|
|
||||||
|
def filter_fn(obj):
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))
|
||||||
|
elif isinstance(obj, (list, tuple, set, LazyList)):
|
||||||
|
return list(map(filter_fn, obj))
|
||||||
|
elif obj is None or any(isinstance(obj, c)
|
||||||
|
for c in (compat_integer_types,
|
||||||
|
(compat_str, float, bool))):
|
||||||
|
return obj
|
||||||
|
else:
|
||||||
|
return repr(obj)
|
||||||
|
|
||||||
|
return filter_fn(info_dict)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def filter_requested_info(cls, info_dict):
|
||||||
|
return cls.sanitize_info(info_dict, True)
|
||||||
|
|
||||||
def post_process(self, filename, ie_info):
|
def post_process(self, filename, ie_info):
|
||||||
"""Run all the postprocessors on the given file."""
|
"""Run all the postprocessors on the given file."""
|
||||||
@ -2300,27 +2493,6 @@ class YoutubeDL(object):
|
|||||||
""" Start an HTTP download """
|
""" Start an HTTP download """
|
||||||
if isinstance(req, compat_basestring):
|
if isinstance(req, compat_basestring):
|
||||||
req = sanitized_Request(req)
|
req = sanitized_Request(req)
|
||||||
# an embedded /../ sequence is not automatically handled by urllib2
|
|
||||||
# see https://github.com/yt-dlp/yt-dlp/issues/3355
|
|
||||||
url = req.get_full_url()
|
|
||||||
parts = url.partition('/../')
|
|
||||||
if parts[1]:
|
|
||||||
url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
|
|
||||||
if url:
|
|
||||||
# worse, URL path may have initial /../ against RFCs: work-around
|
|
||||||
# by stripping such prefixes, like eg Firefox
|
|
||||||
parts = compat_urllib_parse.urlsplit(url)
|
|
||||||
path = parts.path
|
|
||||||
while path.startswith('/../'):
|
|
||||||
path = path[3:]
|
|
||||||
url = parts._replace(path=path).geturl()
|
|
||||||
# get a new Request with the munged URL
|
|
||||||
if url != req.get_full_url():
|
|
||||||
req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
|
|
||||||
req.get_method(), compat_urllib_request.Request)
|
|
||||||
req = req_type(
|
|
||||||
url, data=req.data, headers=dict(req.header_items()),
|
|
||||||
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
|
||||||
return self._opener.open(req, timeout=self._socket_timeout)
|
return self._opener.open(req, timeout=self._socket_timeout)
|
||||||
|
|
||||||
def print_debug_header(self):
|
def print_debug_header(self):
|
||||||
@ -2342,9 +2514,12 @@ class YoutubeDL(object):
|
|||||||
self.get_encoding()))
|
self.get_encoding()))
|
||||||
write_string(encoding_str, encoding=None)
|
write_string(encoding_str, encoding=None)
|
||||||
|
|
||||||
self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
|
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
|
||||||
|
writeln_debug('youtube-dl version ', __version__)
|
||||||
if _LAZY_LOADER:
|
if _LAZY_LOADER:
|
||||||
self._write_string('[debug] Lazy loading extractors enabled' + '\n')
|
writeln_debug('Lazy loading extractors enabled')
|
||||||
|
if ytdl_is_updateable():
|
||||||
|
writeln_debug('Single file build')
|
||||||
try:
|
try:
|
||||||
sp = subprocess.Popen(
|
sp = subprocess.Popen(
|
||||||
['git', 'rev-parse', '--short', 'HEAD'],
|
['git', 'rev-parse', '--short', 'HEAD'],
|
||||||
@ -2353,7 +2528,7 @@ class YoutubeDL(object):
|
|||||||
out, err = process_communicate_or_kill(sp)
|
out, err = process_communicate_or_kill(sp)
|
||||||
out = out.decode().strip()
|
out = out.decode().strip()
|
||||||
if re.match('[0-9a-f]+', out):
|
if re.match('[0-9a-f]+', out):
|
||||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
writeln_debug('Git HEAD: ', out)
|
||||||
except Exception:
|
except Exception:
|
||||||
try:
|
try:
|
||||||
sys.exc_clear()
|
sys.exc_clear()
|
||||||
@ -2366,9 +2541,22 @@ class YoutubeDL(object):
|
|||||||
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
|
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
|
||||||
return impl_name
|
return impl_name
|
||||||
|
|
||||||
self._write_string('[debug] Python version %s (%s) - %s\n' % (
|
def libc_ver():
|
||||||
platform.python_version(), python_implementation(),
|
try:
|
||||||
platform_name()))
|
return platform.libc_ver()
|
||||||
|
except OSError: # We may not have access to the executable
|
||||||
|
return []
|
||||||
|
|
||||||
|
libc = join_nonempty(*libc_ver(), delim=' ')
|
||||||
|
writeln_debug('Python %s (%s %s %s) - %s - %s%s' % (
|
||||||
|
platform.python_version(),
|
||||||
|
python_implementation(),
|
||||||
|
platform.machine(),
|
||||||
|
platform.architecture()[0],
|
||||||
|
platform_name(),
|
||||||
|
OPENSSL_VERSION,
|
||||||
|
(' - %s' % (libc, )) if libc else ''
|
||||||
|
))
|
||||||
|
|
||||||
exe_versions = FFmpegPostProcessor.get_versions(self)
|
exe_versions = FFmpegPostProcessor.get_versions(self)
|
||||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||||
@ -2380,17 +2568,17 @@ class YoutubeDL(object):
|
|||||||
)
|
)
|
||||||
if not exe_str:
|
if not exe_str:
|
||||||
exe_str = 'none'
|
exe_str = 'none'
|
||||||
self._write_string('[debug] exe versions: %s\n' % exe_str)
|
writeln_debug('exe versions: %s' % (exe_str, ))
|
||||||
|
|
||||||
proxy_map = {}
|
proxy_map = {}
|
||||||
for handler in self._opener.handlers:
|
for handler in self._opener.handlers:
|
||||||
if hasattr(handler, 'proxies'):
|
if hasattr(handler, 'proxies'):
|
||||||
proxy_map.update(handler.proxies)
|
proxy_map.update(handler.proxies)
|
||||||
self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
|
writeln_debug('Proxy map: ', compat_str(proxy_map))
|
||||||
|
|
||||||
if self.params.get('call_home', False):
|
if self.params.get('call_home', False):
|
||||||
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
|
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
|
||||||
self._write_string('[debug] Public IP address: %s\n' % ipaddr)
|
writeln_debug('Public IP address: %s' % (ipaddr, ))
|
||||||
latest_version = self.urlopen(
|
latest_version = self.urlopen(
|
||||||
'https://yt-dl.org/latest/version').read().decode('utf-8')
|
'https://yt-dl.org/latest/version').read().decode('utf-8')
|
||||||
if version_tuple(latest_version) > version_tuple(__version__):
|
if version_tuple(latest_version) > version_tuple(__version__):
|
||||||
@ -2407,7 +2595,7 @@ class YoutubeDL(object):
|
|||||||
opts_proxy = self.params.get('proxy')
|
opts_proxy = self.params.get('proxy')
|
||||||
|
|
||||||
if opts_cookiefile is None:
|
if opts_cookiefile is None:
|
||||||
self.cookiejar = compat_cookiejar.CookieJar()
|
self.cookiejar = YoutubeDLCookieJar()
|
||||||
else:
|
else:
|
||||||
opts_cookiefile = expand_path(opts_cookiefile)
|
opts_cookiefile = expand_path(opts_cookiefile)
|
||||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||||
@ -2468,6 +2656,28 @@ class YoutubeDL(object):
|
|||||||
encoding = preferredencoding()
|
encoding = preferredencoding()
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
|
def _write_info_json(self, label, info_dict, infofn, overwrite=None):
|
||||||
|
if not self.params.get('writeinfojson', False):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def msg(fmt, lbl):
|
||||||
|
return fmt % (lbl + ' metadata',)
|
||||||
|
|
||||||
|
if overwrite is None:
|
||||||
|
overwrite = not self.params.get('nooverwrites', False)
|
||||||
|
|
||||||
|
if not overwrite and os.path.exists(encodeFilename(infofn)):
|
||||||
|
self.to_screen(msg('[info] %s is already present', label.title()))
|
||||||
|
return 'exists'
|
||||||
|
else:
|
||||||
|
self.to_screen(msg('[info] Writing %s as JSON to: ', label) + infofn)
|
||||||
|
try:
|
||||||
|
write_json_file(self.filter_requested_info(info_dict), infofn)
|
||||||
|
return True
|
||||||
|
except (OSError, IOError):
|
||||||
|
self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
|
||||||
|
return
|
||||||
|
|
||||||
def _write_thumbnails(self, info_dict, filename):
|
def _write_thumbnails(self, info_dict, filename):
|
||||||
if self.params.get('writethumbnail', False):
|
if self.params.get('writethumbnail', False):
|
||||||
thumbnails = info_dict.get('thumbnails')
|
thumbnails = info_dict.get('thumbnails')
|
||||||
|
@ -5,7 +5,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
__license__ = 'Public Domain'
|
__license__ = 'Public Domain'
|
||||||
|
|
||||||
import codecs
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
@ -17,10 +16,12 @@ from .options import (
|
|||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
|
compat_register_utf8,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
workaround_optparse_bug9161,
|
workaround_optparse_bug9161,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
|
_UnsafeExtensionError,
|
||||||
DateRange,
|
DateRange,
|
||||||
decodeOption,
|
decodeOption,
|
||||||
DEFAULT_OUTTMPL,
|
DEFAULT_OUTTMPL,
|
||||||
@ -46,10 +47,8 @@ from .YoutubeDL import YoutubeDL
|
|||||||
|
|
||||||
|
|
||||||
def _real_main(argv=None):
|
def _real_main(argv=None):
|
||||||
# Compatibility fixes for Windows
|
# Compatibility fix for Windows
|
||||||
if sys.platform == 'win32':
|
compat_register_utf8()
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/820
|
|
||||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
|
||||||
|
|
||||||
workaround_optparse_bug9161()
|
workaround_optparse_bug9161()
|
||||||
|
|
||||||
@ -175,6 +174,9 @@ def _real_main(argv=None):
|
|||||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||||
|
|
||||||
|
if opts.no_check_extensions:
|
||||||
|
_UnsafeExtensionError.lenient = True
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
parsed_retries = float('inf')
|
parsed_retries = float('inf')
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .compat import compat_getenv
|
from .compat import (
|
||||||
|
compat_getenv,
|
||||||
|
compat_open as open,
|
||||||
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
expand_path,
|
expand_path,
|
||||||
@ -83,7 +85,7 @@ class Cache(object):
|
|||||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
with open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||||
return self._validate(json.load(cachef), min_ver)
|
return self._validate(json.load(cachef), min_ver)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
try:
|
try:
|
||||||
|
@ -1663,5 +1663,5 @@ def casefold(s):
|
|||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
casefold
|
'casefold',
|
||||||
]
|
]
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import binascii
|
import binascii
|
||||||
import collections
|
import collections
|
||||||
import ctypes
|
import ctypes
|
||||||
|
import datetime
|
||||||
import email
|
import email
|
||||||
import getpass
|
import getpass
|
||||||
import io
|
import io
|
||||||
@ -19,6 +21,7 @@ import socket
|
|||||||
import struct
|
import struct
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import types
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
# naming convention
|
# naming convention
|
||||||
@ -31,13 +34,17 @@ try:
|
|||||||
compat_str, compat_basestring, compat_chr = (
|
compat_str, compat_basestring, compat_chr = (
|
||||||
unicode, basestring, unichr
|
unicode, basestring, unichr
|
||||||
)
|
)
|
||||||
from .casefold import casefold as compat_casefold
|
|
||||||
|
|
||||||
except NameError:
|
except NameError:
|
||||||
compat_str, compat_basestring, compat_chr = (
|
compat_str, compat_basestring, compat_chr = (
|
||||||
str, str, chr
|
str, (str, bytes), chr
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# casefold
|
||||||
|
try:
|
||||||
|
compat_str.casefold
|
||||||
compat_casefold = lambda s: s.casefold()
|
compat_casefold = lambda s: s.casefold()
|
||||||
|
except AttributeError:
|
||||||
|
from .casefold import casefold as compat_casefold
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import collections.abc as compat_collections_abc
|
import collections.abc as compat_collections_abc
|
||||||
@ -49,6 +56,29 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib2 as compat_urllib_request
|
import urllib2 as compat_urllib_request
|
||||||
|
|
||||||
|
# Also fix up lack of method arg in old Pythons
|
||||||
|
try:
|
||||||
|
type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
|
||||||
|
except TypeError:
|
||||||
|
def _add_init_method_arg(cls):
|
||||||
|
|
||||||
|
init = cls.__init__
|
||||||
|
|
||||||
|
def wrapped_init(self, *args, **kwargs):
|
||||||
|
method = kwargs.pop('method', 'GET')
|
||||||
|
init(self, *args, **kwargs)
|
||||||
|
if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
|
||||||
|
# allow instance or its subclass to override get_method()
|
||||||
|
return
|
||||||
|
if self.has_data() and method == 'GET':
|
||||||
|
method = 'POST'
|
||||||
|
self.get_method = types.MethodType(lambda _: method, self)
|
||||||
|
|
||||||
|
cls.__init__ = wrapped_init
|
||||||
|
|
||||||
|
_add_init_method_arg(compat_urllib_request.Request)
|
||||||
|
del _add_init_method_arg
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.error as compat_urllib_error
|
import urllib.error as compat_urllib_error
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
@ -73,6 +103,12 @@ try:
|
|||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib as compat_urllib_response
|
import urllib as compat_urllib_response
|
||||||
|
|
||||||
|
try:
|
||||||
|
compat_urllib_response.addinfourl.status
|
||||||
|
except AttributeError:
|
||||||
|
# .getcode() is deprecated in Py 3.
|
||||||
|
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookiejar as compat_cookiejar
|
import http.cookiejar as compat_cookiejar
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
@ -97,12 +133,24 @@ except ImportError: # Python 2
|
|||||||
import Cookie as compat_cookies
|
import Cookie as compat_cookies
|
||||||
compat_http_cookies = compat_cookies
|
compat_http_cookies = compat_cookies
|
||||||
|
|
||||||
if sys.version_info[0] == 2:
|
if sys.version_info[0] == 2 or sys.version_info < (3, 3):
|
||||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||||
def load(self, rawdata):
|
def load(self, rawdata):
|
||||||
|
must_have_value = 0
|
||||||
|
if not isinstance(rawdata, dict):
|
||||||
|
if sys.version_info[:2] != (2, 7) or sys.platform.startswith('java'):
|
||||||
|
# attribute must have value for parsing
|
||||||
|
rawdata, must_have_value = re.subn(
|
||||||
|
r'(?i)(;\s*)(secure|httponly)(\s*(?:;|$))', r'\1\2=\2\3', rawdata)
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
if isinstance(rawdata, compat_str):
|
if isinstance(rawdata, compat_str):
|
||||||
rawdata = str(rawdata)
|
rawdata = str(rawdata)
|
||||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||||
|
if must_have_value > 0:
|
||||||
|
for morsel in self.values():
|
||||||
|
for attr in ('secure', 'httponly'):
|
||||||
|
if morsel.get(attr):
|
||||||
|
morsel[attr] = True
|
||||||
else:
|
else:
|
||||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||||
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
||||||
@ -2354,6 +2402,11 @@ try:
|
|||||||
import http.client as compat_http_client
|
import http.client as compat_http_client
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import httplib as compat_http_client
|
import httplib as compat_http_client
|
||||||
|
try:
|
||||||
|
compat_http_client.HTTPResponse.getcode
|
||||||
|
except AttributeError:
|
||||||
|
# Py < 3.1
|
||||||
|
compat_http_client.HTTPResponse.getcode = lambda self: self.status
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.error import HTTPError as compat_HTTPError
|
from urllib.error import HTTPError as compat_HTTPError
|
||||||
@ -2368,29 +2421,26 @@ except ImportError: # Python 2
|
|||||||
compat_urllib_request_urlretrieve = compat_urlretrieve
|
compat_urllib_request_urlretrieve = compat_urlretrieve
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
from HTMLParser import (
|
||||||
|
HTMLParser as compat_HTMLParser,
|
||||||
|
HTMLParseError as compat_HTMLParseError)
|
||||||
|
except ImportError: # Python 3
|
||||||
from html.parser import HTMLParser as compat_HTMLParser
|
from html.parser import HTMLParser as compat_HTMLParser
|
||||||
except ImportError: # Python 2
|
|
||||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
|
||||||
compat_html_parser_HTMLParser = compat_HTMLParser
|
|
||||||
|
|
||||||
try: # Python 2
|
|
||||||
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
|
||||||
except ImportError: # Python <3.4
|
|
||||||
try:
|
try:
|
||||||
from html.parser import HTMLParseError as compat_HTMLParseError
|
from html.parser import HTMLParseError as compat_HTMLParseError
|
||||||
except ImportError: # Python >3.4
|
except ImportError: # Python >3.4
|
||||||
|
# HTMLParseError was deprecated in Python 3.3 and removed in
|
||||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
|
||||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
# and uniform cross-version exception handling
|
# and uniform cross-version exception handling
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
compat_html_parser_HTMLParser = compat_HTMLParser
|
||||||
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
_DEVNULL = subprocess.DEVNULL
|
||||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
compat_subprocess_get_DEVNULL = lambda: _DEVNULL
|
||||||
except ImportError:
|
except AttributeError:
|
||||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -2669,8 +2719,222 @@ if sys.version_info < (2, 7):
|
|||||||
if isinstance(xpath, compat_str):
|
if isinstance(xpath, compat_str):
|
||||||
xpath = xpath.encode('ascii')
|
xpath = xpath.encode('ascii')
|
||||||
return xpath
|
return xpath
|
||||||
|
|
||||||
|
# further code below based on CPython 2.7 source
|
||||||
|
import functools
|
||||||
|
|
||||||
|
_xpath_tokenizer_re = re.compile(r'''(?x)
|
||||||
|
( # (1)
|
||||||
|
'[^']*'|"[^"]*"| # quoted strings, or
|
||||||
|
::|//?|\.\.|\(\)|[/.*:[\]()@=] # navigation specials
|
||||||
|
)| # or (2)
|
||||||
|
((?:\{[^}]+\})?[^/[\]()@=\s]+)| # token: optional {ns}, no specials
|
||||||
|
\s+ # or white space
|
||||||
|
''')
|
||||||
|
|
||||||
|
def _xpath_tokenizer(pattern, namespaces=None):
|
||||||
|
for token in _xpath_tokenizer_re.findall(pattern):
|
||||||
|
tag = token[1]
|
||||||
|
if tag and tag[0] != "{" and ":" in tag:
|
||||||
|
try:
|
||||||
|
if not namespaces:
|
||||||
|
raise KeyError
|
||||||
|
prefix, uri = tag.split(":", 1)
|
||||||
|
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
|
||||||
|
except KeyError:
|
||||||
|
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
||||||
|
else:
|
||||||
|
yield token
|
||||||
|
|
||||||
|
def _get_parent_map(context):
|
||||||
|
parent_map = context.parent_map
|
||||||
|
if parent_map is None:
|
||||||
|
context.parent_map = parent_map = {}
|
||||||
|
for p in context.root.getiterator():
|
||||||
|
for e in p:
|
||||||
|
parent_map[e] = p
|
||||||
|
return parent_map
|
||||||
|
|
||||||
|
def _select(context, result, filter_fn=lambda *_: True):
|
||||||
|
for elem in result:
|
||||||
|
for e in elem:
|
||||||
|
if filter_fn(e, elem):
|
||||||
|
yield e
|
||||||
|
|
||||||
|
def _prepare_child(next_, token):
|
||||||
|
tag = token[1]
|
||||||
|
return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
|
||||||
|
|
||||||
|
def _prepare_star(next_, token):
|
||||||
|
return _select
|
||||||
|
|
||||||
|
def _prepare_self(next_, token):
|
||||||
|
return lambda _, result: (e for e in result)
|
||||||
|
|
||||||
|
def _prepare_descendant(next_, token):
|
||||||
|
token = next(next_)
|
||||||
|
if token[0] == "*":
|
||||||
|
tag = "*"
|
||||||
|
elif not token[0]:
|
||||||
|
tag = token[1]
|
||||||
|
else:
|
||||||
|
raise SyntaxError("invalid descendant")
|
||||||
|
|
||||||
|
def select(context, result):
|
||||||
|
for elem in result:
|
||||||
|
for e in elem.getiterator(tag):
|
||||||
|
if e is not elem:
|
||||||
|
yield e
|
||||||
|
return select
|
||||||
|
|
||||||
|
def _prepare_parent(next_, token):
|
||||||
|
def select(context, result):
|
||||||
|
# FIXME: raise error if .. is applied at toplevel?
|
||||||
|
parent_map = _get_parent_map(context)
|
||||||
|
result_map = {}
|
||||||
|
for elem in result:
|
||||||
|
if elem in parent_map:
|
||||||
|
parent = parent_map[elem]
|
||||||
|
if parent not in result_map:
|
||||||
|
result_map[parent] = None
|
||||||
|
yield parent
|
||||||
|
return select
|
||||||
|
|
||||||
|
def _prepare_predicate(next_, token):
|
||||||
|
signature = []
|
||||||
|
predicate = []
|
||||||
|
for token in next_:
|
||||||
|
if token[0] == "]":
|
||||||
|
break
|
||||||
|
if token[0] and token[0][:1] in "'\"":
|
||||||
|
token = "'", token[0][1:-1]
|
||||||
|
signature.append(token[0] or "-")
|
||||||
|
predicate.append(token[1])
|
||||||
|
|
||||||
|
def select(context, result, filter_fn=lambda _: True):
|
||||||
|
for elem in result:
|
||||||
|
if filter_fn(elem):
|
||||||
|
yield elem
|
||||||
|
|
||||||
|
signature = "".join(signature)
|
||||||
|
# use signature to determine predicate type
|
||||||
|
if signature == "@-":
|
||||||
|
# [@attribute] predicate
|
||||||
|
key = predicate[1]
|
||||||
|
return functools.partial(
|
||||||
|
select, filter_fn=lambda el: el.get(key) is not None)
|
||||||
|
if signature == "@-='":
|
||||||
|
# [@attribute='value']
|
||||||
|
key = predicate[1]
|
||||||
|
value = predicate[-1]
|
||||||
|
return functools.partial(
|
||||||
|
select, filter_fn=lambda el: el.get(key) == value)
|
||||||
|
if signature == "-" and not re.match(r"\d+$", predicate[0]):
|
||||||
|
# [tag]
|
||||||
|
tag = predicate[0]
|
||||||
|
return functools.partial(
|
||||||
|
select, filter_fn=lambda el: el.find(tag) is not None)
|
||||||
|
if signature == "-='" and not re.match(r"\d+$", predicate[0]):
|
||||||
|
# [tag='value']
|
||||||
|
tag = predicate[0]
|
||||||
|
value = predicate[-1]
|
||||||
|
|
||||||
|
def itertext(el):
|
||||||
|
for e in el.getiterator():
|
||||||
|
e = e.text
|
||||||
|
if e:
|
||||||
|
yield e
|
||||||
|
|
||||||
|
def select(context, result):
|
||||||
|
for elem in result:
|
||||||
|
for e in elem.findall(tag):
|
||||||
|
if "".join(itertext(e)) == value:
|
||||||
|
yield elem
|
||||||
|
break
|
||||||
|
return select
|
||||||
|
if signature == "-" or signature == "-()" or signature == "-()-":
|
||||||
|
# [index] or [last()] or [last()-index]
|
||||||
|
if signature == "-":
|
||||||
|
index = int(predicate[0]) - 1
|
||||||
|
else:
|
||||||
|
if predicate[0] != "last":
|
||||||
|
raise SyntaxError("unsupported function")
|
||||||
|
if signature == "-()-":
|
||||||
|
try:
|
||||||
|
index = int(predicate[2]) - 1
|
||||||
|
except ValueError:
|
||||||
|
raise SyntaxError("unsupported expression")
|
||||||
|
else:
|
||||||
|
index = -1
|
||||||
|
|
||||||
|
def select(context, result):
|
||||||
|
parent_map = _get_parent_map(context)
|
||||||
|
for elem in result:
|
||||||
|
try:
|
||||||
|
parent = parent_map[elem]
|
||||||
|
# FIXME: what if the selector is "*" ?
|
||||||
|
elems = list(parent.findall(elem.tag))
|
||||||
|
if elems[index] is elem:
|
||||||
|
yield elem
|
||||||
|
except (IndexError, KeyError):
|
||||||
|
pass
|
||||||
|
return select
|
||||||
|
raise SyntaxError("invalid predicate")
|
||||||
|
|
||||||
|
ops = {
|
||||||
|
"": _prepare_child,
|
||||||
|
"*": _prepare_star,
|
||||||
|
".": _prepare_self,
|
||||||
|
"..": _prepare_parent,
|
||||||
|
"//": _prepare_descendant,
|
||||||
|
"[": _prepare_predicate,
|
||||||
|
}
|
||||||
|
|
||||||
|
_cache = {}
|
||||||
|
|
||||||
|
class _SelectorContext:
|
||||||
|
parent_map = None
|
||||||
|
|
||||||
|
def __init__(self, root):
|
||||||
|
self.root = root
|
||||||
|
|
||||||
|
##
|
||||||
|
# Generate all matching objects.
|
||||||
|
|
||||||
|
def compat_etree_iterfind(elem, path, namespaces=None):
|
||||||
|
# compile selector pattern
|
||||||
|
if path[-1:] == "/":
|
||||||
|
path = path + "*" # implicit all (FIXME: keep this?)
|
||||||
|
try:
|
||||||
|
selector = _cache[path]
|
||||||
|
except KeyError:
|
||||||
|
if len(_cache) > 100:
|
||||||
|
_cache.clear()
|
||||||
|
if path[:1] == "/":
|
||||||
|
raise SyntaxError("cannot use absolute path on element")
|
||||||
|
tokens = _xpath_tokenizer(path, namespaces)
|
||||||
|
selector = []
|
||||||
|
for token in tokens:
|
||||||
|
if token[0] == "/":
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
selector.append(ops[token[0]](tokens, token))
|
||||||
|
except StopIteration:
|
||||||
|
raise SyntaxError("invalid path")
|
||||||
|
_cache[path] = selector
|
||||||
|
# execute selector pattern
|
||||||
|
result = [elem]
|
||||||
|
context = _SelectorContext(elem)
|
||||||
|
for select in selector:
|
||||||
|
result = select(context, result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# end of code based on CPython 2.7 source
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
compat_xpath = lambda xpath: xpath
|
compat_xpath = lambda xpath: xpath
|
||||||
|
compat_etree_iterfind = lambda element, match: element.iterfind(match)
|
||||||
|
|
||||||
|
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
@ -2706,7 +2970,7 @@ except (AssertionError, UnicodeEncodeError):
|
|||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
if type(c) is int:
|
if isinstance(c, int):
|
||||||
return c
|
return c
|
||||||
else:
|
else:
|
||||||
return ord(c)
|
return ord(c)
|
||||||
@ -2890,6 +3154,51 @@ else:
|
|||||||
compat_socket_create_connection = socket.create_connection
|
compat_socket_create_connection = socket.create_connection
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from contextlib import suppress as compat_contextlib_suppress
|
||||||
|
except ImportError:
|
||||||
|
class compat_contextlib_suppress(object):
|
||||||
|
_exceptions = None
|
||||||
|
|
||||||
|
def __init__(self, *exceptions):
|
||||||
|
super(compat_contextlib_suppress, self).__init__()
|
||||||
|
# TODO: [Base]ExceptionGroup (3.12+)
|
||||||
|
self._exceptions = exceptions
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
|
||||||
|
|
||||||
|
|
||||||
|
# subprocess.Popen context manager
|
||||||
|
# avoids leaking handles if .communicate() is not called
|
||||||
|
try:
|
||||||
|
_Popen = subprocess.Popen
|
||||||
|
# check for required context manager attributes
|
||||||
|
_Popen.__enter__ and _Popen.__exit__
|
||||||
|
compat_subprocess_Popen = _Popen
|
||||||
|
except AttributeError:
|
||||||
|
# not a context manager - make one
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def compat_subprocess_Popen(*args, **kwargs):
|
||||||
|
popen = None
|
||||||
|
try:
|
||||||
|
popen = _Popen(*args, **kwargs)
|
||||||
|
yield popen
|
||||||
|
finally:
|
||||||
|
if popen:
|
||||||
|
for f in (popen.stdin, popen.stdout, popen.stderr):
|
||||||
|
if f:
|
||||||
|
# repeated .close() is OK, but just in case
|
||||||
|
with compat_contextlib_suppress(EnvironmentError):
|
||||||
|
f.close()
|
||||||
|
popen.wait()
|
||||||
|
|
||||||
|
|
||||||
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
|
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
|
||||||
# See http://bugs.python.org/issue9161 for what is broken
|
# See http://bugs.python.org/issue9161 for what is broken
|
||||||
def workaround_optparse_bug9161():
|
def workaround_optparse_bug9161():
|
||||||
@ -3127,6 +3436,46 @@ else:
|
|||||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
if sys.version_info < (3, 0):
|
||||||
|
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
|
||||||
|
def compat_open(file_, *args, **kwargs):
|
||||||
|
if len(args) > 6 or 'opener' in kwargs:
|
||||||
|
raise ValueError('open: unsupported argument "opener"')
|
||||||
|
return io.open(file_, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
compat_open = open
|
||||||
|
|
||||||
|
|
||||||
|
# compat_register_utf8
|
||||||
|
def compat_register_utf8():
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/820
|
||||||
|
from codecs import register, lookup
|
||||||
|
register(
|
||||||
|
lambda name: lookup('utf-8') if name == 'cp65001' else None)
|
||||||
|
|
||||||
|
|
||||||
|
# compat_datetime_timedelta_total_seconds
|
||||||
|
try:
|
||||||
|
compat_datetime_timedelta_total_seconds = datetime.timedelta.total_seconds
|
||||||
|
except AttributeError:
|
||||||
|
# Py 2.6
|
||||||
|
def compat_datetime_timedelta_total_seconds(td):
|
||||||
|
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
||||||
|
|
||||||
|
# optional decompression packages
|
||||||
|
# PyPi brotli package implements 'br' Content-Encoding
|
||||||
|
try:
|
||||||
|
import brotli as compat_brotli
|
||||||
|
except ImportError:
|
||||||
|
compat_brotli = None
|
||||||
|
# PyPi ncompress package implements 'compress' Content-Encoding
|
||||||
|
try:
|
||||||
|
import ncompress as compat_ncompress
|
||||||
|
except ImportError:
|
||||||
|
compat_ncompress = None
|
||||||
|
|
||||||
|
|
||||||
legacy = [
|
legacy = [
|
||||||
'compat_HTMLParseError',
|
'compat_HTMLParseError',
|
||||||
'compat_HTMLParser',
|
'compat_HTMLParser',
|
||||||
@ -3160,16 +3509,20 @@ __all__ = [
|
|||||||
'compat_Struct',
|
'compat_Struct',
|
||||||
'compat_base64_b64decode',
|
'compat_base64_b64decode',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
|
'compat_brotli',
|
||||||
'compat_casefold',
|
'compat_casefold',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_collections_abc',
|
'compat_collections_abc',
|
||||||
'compat_collections_chain_map',
|
'compat_collections_chain_map',
|
||||||
|
'compat_datetime_timedelta_total_seconds',
|
||||||
'compat_http_cookiejar',
|
'compat_http_cookiejar',
|
||||||
'compat_http_cookiejar_Cookie',
|
'compat_http_cookiejar_Cookie',
|
||||||
'compat_http_cookies',
|
'compat_http_cookies',
|
||||||
'compat_http_cookies_SimpleCookie',
|
'compat_http_cookies_SimpleCookie',
|
||||||
|
'compat_contextlib_suppress',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
|
'compat_etree_iterfind',
|
||||||
'compat_filter',
|
'compat_filter',
|
||||||
'compat_get_terminal_size',
|
'compat_get_terminal_size',
|
||||||
'compat_getenv',
|
'compat_getenv',
|
||||||
@ -3184,7 +3537,9 @@ __all__ = [
|
|||||||
'compat_itertools_zip_longest',
|
'compat_itertools_zip_longest',
|
||||||
'compat_kwargs',
|
'compat_kwargs',
|
||||||
'compat_map',
|
'compat_map',
|
||||||
|
'compat_ncompress',
|
||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
|
'compat_open',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_os_path_expanduser',
|
'compat_os_path_expanduser',
|
||||||
@ -3192,6 +3547,7 @@ __all__ = [
|
|||||||
'compat_print',
|
'compat_print',
|
||||||
'compat_re_Match',
|
'compat_re_Match',
|
||||||
'compat_re_Pattern',
|
'compat_re_Pattern',
|
||||||
|
'compat_register_utf8',
|
||||||
'compat_setenv',
|
'compat_setenv',
|
||||||
'compat_shlex_quote',
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shlex_split',
|
||||||
@ -3200,6 +3556,7 @@ __all__ = [
|
|||||||
'compat_struct_pack',
|
'compat_struct_pack',
|
||||||
'compat_struct_unpack',
|
'compat_struct_unpack',
|
||||||
'compat_subprocess_get_DEVNULL',
|
'compat_subprocess_get_DEVNULL',
|
||||||
|
'compat_subprocess_Popen',
|
||||||
'compat_tokenize_tokenize',
|
'compat_tokenize_tokenize',
|
||||||
'compat_urllib_error',
|
'compat_urllib_error',
|
||||||
'compat_urllib_parse',
|
'compat_urllib_parse',
|
||||||
|
@ -96,7 +96,7 @@ class FileDownloader(object):
|
|||||||
return None
|
return None
|
||||||
return int(float(remaining) / rate)
|
return int(float(remaining) / rate)
|
||||||
start, now = (start_or_rate, now_or_remaining)
|
start, now = (start_or_rate, now_or_remaining)
|
||||||
total, current = args
|
total, current = args[:2]
|
||||||
if total is None:
|
if total is None:
|
||||||
return None
|
return None
|
||||||
if now is None:
|
if now is None:
|
||||||
@ -339,6 +339,10 @@ class FileDownloader(object):
|
|||||||
def download(self, filename, info_dict):
|
def download(self, filename, info_dict):
|
||||||
"""Download to a filename using the info from info_dict
|
"""Download to a filename using the info from info_dict
|
||||||
Return True on success and False otherwise
|
Return True on success and False otherwise
|
||||||
|
|
||||||
|
This method filters the `Cookie` header from the info_dict to prevent leaks.
|
||||||
|
Downloaders have their own way of handling cookies.
|
||||||
|
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||||
"""
|
"""
|
||||||
|
|
||||||
nooverwrites_and_exists = (
|
nooverwrites_and_exists = (
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import compat_urllib_error
|
from ..compat import compat_urllib_error
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -30,25 +32,28 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
fragment_retries = self.params.get('fragment_retries', 0)
|
fragment_retries = self.params.get('fragment_retries', 0)
|
||||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||||
|
|
||||||
frag_index = 0
|
for frag_index, fragment in enumerate(fragments, 1):
|
||||||
for i, fragment in enumerate(fragments):
|
|
||||||
frag_index += 1
|
|
||||||
if frag_index <= ctx['fragment_index']:
|
if frag_index <= ctx['fragment_index']:
|
||||||
continue
|
continue
|
||||||
|
success = False
|
||||||
# In DASH, the first segment contains necessary headers to
|
# In DASH, the first segment contains necessary headers to
|
||||||
# generate a valid MP4 file, so always abort for the first segment
|
# generate a valid MP4 file, so always abort for the first segment
|
||||||
fatal = i == 0 or not skip_unavailable_fragments
|
fatal = frag_index == 1 or not skip_unavailable_fragments
|
||||||
for count in range(fragment_retries + 1):
|
|
||||||
try:
|
|
||||||
fragment_url = fragment.get('url')
|
fragment_url = fragment.get('url')
|
||||||
if not fragment_url:
|
if not fragment_url:
|
||||||
assert fragment_base_url
|
assert fragment_base_url
|
||||||
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
fragment_url = urljoin(fragment_base_url, fragment['path'])
|
||||||
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
|
headers = info_dict.get('http_headers')
|
||||||
|
fragment_range = fragment.get('range')
|
||||||
|
if fragment_range:
|
||||||
|
headers = headers.copy() if headers else {}
|
||||||
|
headers['Range'] = 'bytes=%s' % (fragment_range,)
|
||||||
|
for count in itertools.count():
|
||||||
|
try:
|
||||||
|
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict, headers)
|
||||||
if not success:
|
if not success:
|
||||||
return False
|
return False
|
||||||
self._append_fragment(ctx, frag_content)
|
self._append_fragment(ctx, frag_content)
|
||||||
break
|
|
||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||||
# whole download to fail. However if the same fragment is immediately
|
# whole download to fail. However if the same fragment is immediately
|
||||||
@ -58,19 +63,19 @@ class DashSegmentsFD(FragmentFD):
|
|||||||
# HTTP error.
|
# HTTP error.
|
||||||
if count < fragment_retries:
|
if count < fragment_retries:
|
||||||
self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
|
self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
|
||||||
|
continue
|
||||||
except DownloadError:
|
except DownloadError:
|
||||||
# Don't retry fragment if error occurred during HTTP downloading
|
# Don't retry fragment if error occurred during HTTP downloading
|
||||||
# itself since it has own retry settings
|
# itself since it has its own retry settings
|
||||||
if not fatal:
|
if fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
|
||||||
break
|
|
||||||
raise
|
raise
|
||||||
|
break
|
||||||
|
|
||||||
if count >= fragment_retries:
|
if not success:
|
||||||
if not fatal:
|
if not fatal:
|
||||||
self.report_skip_fragment(frag_index)
|
self.report_skip_fragment(frag_index)
|
||||||
continue
|
continue
|
||||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
self.report_error('giving up after %s fragment retries' % count)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
self._finish_frag_download(ctx)
|
self._finish_frag_download(ctx)
|
||||||
|
@ -1,17 +1,24 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import os.path
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_setenv,
|
compat_setenv,
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_subprocess_Popen,
|
||||||
)
|
)
|
||||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
|
||||||
|
try:
|
||||||
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
|
||||||
|
except ImportError:
|
||||||
|
FFmpegPostProcessor = None
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
cli_option,
|
cli_option,
|
||||||
cli_valueless_option,
|
cli_valueless_option,
|
||||||
@ -23,6 +30,8 @@ from ..utils import (
|
|||||||
check_executable,
|
check_executable,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
process_communicate_or_kill,
|
process_communicate_or_kill,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -30,6 +39,7 @@ class ExternalFD(FileDownloader):
|
|||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
self.report_destination(filename)
|
self.report_destination(filename)
|
||||||
tmpfilename = self.temp_name(filename)
|
tmpfilename = self.temp_name(filename)
|
||||||
|
self._cookies_tempfile = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
started = time.time()
|
started = time.time()
|
||||||
@ -42,6 +52,13 @@ class ExternalFD(FileDownloader):
|
|||||||
# should take place
|
# should take place
|
||||||
retval = 0
|
retval = 0
|
||||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||||
|
finally:
|
||||||
|
if self._cookies_tempfile and os.path.isfile(self._cookies_tempfile):
|
||||||
|
try:
|
||||||
|
os.remove(self._cookies_tempfile)
|
||||||
|
except OSError:
|
||||||
|
self.report_warning(
|
||||||
|
'Unable to delete temporary cookies file "{0}"'.format(self._cookies_tempfile))
|
||||||
|
|
||||||
if retval == 0:
|
if retval == 0:
|
||||||
status = {
|
status = {
|
||||||
@ -97,6 +114,16 @@ class ExternalFD(FileDownloader):
|
|||||||
def _configuration_args(self, default=[]):
|
def _configuration_args(self, default=[]):
|
||||||
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
return cli_configuration_args(self.params, 'external_downloader_args', default)
|
||||||
|
|
||||||
|
def _write_cookies(self):
|
||||||
|
if not self.ydl.cookiejar.filename:
|
||||||
|
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
|
||||||
|
tmp_cookies.close()
|
||||||
|
self._cookies_tempfile = tmp_cookies.name
|
||||||
|
self.to_screen('[download] Writing temporary cookies file to "{0}"'.format(self._cookies_tempfile))
|
||||||
|
# real_download resets _cookies_tempfile; if it's None, save() will write to cookiejar.filename
|
||||||
|
self.ydl.cookiejar.save(self._cookies_tempfile, ignore_discard=True, ignore_expires=True)
|
||||||
|
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
""" Either overwrite this or implement _make_cmd """
|
""" Either overwrite this or implement _make_cmd """
|
||||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||||
@ -110,13 +137,21 @@ class ExternalFD(FileDownloader):
|
|||||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||||
return p.returncode
|
return p.returncode
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _header_items(info_dict):
|
||||||
|
return traverse_obj(
|
||||||
|
info_dict, ('http_headers', T(dict.items), Ellipsis))
|
||||||
|
|
||||||
|
|
||||||
class CurlFD(ExternalFD):
|
class CurlFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-V'
|
AVAILABLE_OPT = '-V'
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '--location', '-o', tmpfilename]
|
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||||
for key, val in info_dict['http_headers'].items():
|
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
|
if cookie_header:
|
||||||
|
cmd += ['--cookie', cookie_header]
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
|
||||||
cmd += self._valueless_option('--silent', 'noprogress')
|
cmd += self._valueless_option('--silent', 'noprogress')
|
||||||
@ -151,8 +186,11 @@ class AxelFD(ExternalFD):
|
|||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-o', tmpfilename]
|
cmd = [self.exe, '-o', tmpfilename]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['-H', '%s: %s' % (key, val)]
|
cmd += ['-H', '%s: %s' % (key, val)]
|
||||||
|
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
|
if cookie_header:
|
||||||
|
cmd += ['-H', 'Cookie: {0}'.format(cookie_header), '--max-redirect=0']
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
@ -162,8 +200,10 @@ class WgetFD(ExternalFD):
|
|||||||
AVAILABLE_OPT = '--version'
|
AVAILABLE_OPT = '--version'
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
|
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
|
||||||
for key, val in info_dict['http_headers'].items():
|
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||||
|
cmd += ['--load-cookies', self._write_cookies()]
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--limit-rate', 'ratelimit')
|
cmd += self._option('--limit-rate', 'ratelimit')
|
||||||
retry = self._option('--tries', 'retries')
|
retry = self._option('--tries', 'retries')
|
||||||
@ -172,7 +212,10 @@ class WgetFD(ExternalFD):
|
|||||||
retry[1] = '0'
|
retry[1] = '0'
|
||||||
cmd += retry
|
cmd += retry
|
||||||
cmd += self._option('--bind-address', 'source_address')
|
cmd += self._option('--bind-address', 'source_address')
|
||||||
cmd += self._option('--proxy', 'proxy')
|
proxy = self.params.get('proxy')
|
||||||
|
if proxy:
|
||||||
|
for var in ('http_proxy', 'https_proxy'):
|
||||||
|
cmd += ['--execute', '%s=%s' % (var, proxy)]
|
||||||
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
|
||||||
cmd += self._configuration_args()
|
cmd += self._configuration_args()
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
@ -182,20 +225,57 @@ class WgetFD(ExternalFD):
|
|||||||
class Aria2cFD(ExternalFD):
|
class Aria2cFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-v'
|
AVAILABLE_OPT = '-v'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _aria2c_filename(fn):
|
||||||
|
return fn if os.path.isabs(fn) else os.path.join('.', fn)
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-c']
|
cmd = [self.exe, '-c',
|
||||||
cmd += self._configuration_args([
|
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||||
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||||
dn = os.path.dirname(tmpfilename)
|
if 'fragments' in info_dict:
|
||||||
if dn:
|
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
|
||||||
cmd += ['--dir', dn]
|
else:
|
||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
cmd += ['--min-split-size', '1M']
|
||||||
for key, val in info_dict['http_headers'].items():
|
|
||||||
|
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||||
|
cmd += ['--load-cookies={0}'.format(self._write_cookies())]
|
||||||
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
|
cmd += self._configuration_args(['--max-connection-per-server', '4'])
|
||||||
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
|
cmd += self._option('--max-overall-download-limit', 'ratelimit')
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||||
|
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
|
||||||
|
cmd += self._configuration_args()
|
||||||
|
|
||||||
|
# aria2c strips out spaces from the beginning/end of filenames and paths.
|
||||||
|
# We work around this issue by adding a "./" to the beginning of the
|
||||||
|
# filename and relative path, and adding a "/" at the end of the path.
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/276
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/20312
|
||||||
|
# https://github.com/aria2/aria2/issues/1373
|
||||||
|
dn = os.path.dirname(tmpfilename)
|
||||||
|
if dn:
|
||||||
|
cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
|
||||||
|
if 'fragments' not in info_dict:
|
||||||
|
cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
|
||||||
|
cmd += ['--auto-file-renaming=false']
|
||||||
|
if 'fragments' in info_dict:
|
||||||
|
cmd += ['--file-allocation=none', '--uri-selector=inorder']
|
||||||
|
url_list_file = '%s.frag.urls' % (tmpfilename, )
|
||||||
|
url_list = []
|
||||||
|
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||||
|
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||||
|
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||||
|
stream, _ = self.sanitize_open(url_list_file, 'wb')
|
||||||
|
stream.write('\n'.join(url_list).encode())
|
||||||
|
stream.close()
|
||||||
|
cmd += ['-i', self._aria2c_filename(url_list_file)]
|
||||||
|
else:
|
||||||
cmd += ['--', info_dict['url']]
|
cmd += ['--', info_dict['url']]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
@ -235,8 +315,10 @@ class Aria2pFD(ExternalFD):
|
|||||||
}
|
}
|
||||||
options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
|
options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
|
||||||
options['out'] = os.path.basename(tmpfilename)
|
options['out'] = os.path.basename(tmpfilename)
|
||||||
|
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||||
|
options['load-cookies'] = self._write_cookies()
|
||||||
options['header'] = []
|
options['header'] = []
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in self._header_items(info_dict):
|
||||||
options['header'].append('{0}: {1}'.format(key, val))
|
options['header'].append('{0}: {1}'.format(key, val))
|
||||||
download = aria2.add_uris([info_dict['url']], options)
|
download = aria2.add_uris([info_dict['url']], options)
|
||||||
status = {
|
status = {
|
||||||
@ -265,8 +347,16 @@ class HttpieFD(ExternalFD):
|
|||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
|
||||||
for key, val in info_dict['http_headers'].items():
|
for key, val in self._header_items(info_dict):
|
||||||
cmd += ['%s:%s' % (key, val)]
|
cmd += ['%s:%s' % (key, val)]
|
||||||
|
|
||||||
|
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
|
||||||
|
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
|
||||||
|
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
|
||||||
|
# 2: https://httpie.io/docs/cli/sessions
|
||||||
|
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||||
|
if cookie_header:
|
||||||
|
cmd += ['Cookie:%s' % cookie_header]
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
@ -277,13 +367,14 @@ class FFmpegFD(ExternalFD):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls):
|
||||||
return FFmpegPostProcessor().available
|
# actual availability can only be confirmed for an instance
|
||||||
|
return bool(FFmpegPostProcessor)
|
||||||
|
|
||||||
def _call_downloader(self, tmpfilename, info_dict):
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
url = info_dict['url']
|
# `downloader` means the parent `YoutubeDL`
|
||||||
ffpp = FFmpegPostProcessor(downloader=self)
|
ffpp = FFmpegPostProcessor(downloader=self.ydl)
|
||||||
if not ffpp.available:
|
if not ffpp.available:
|
||||||
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
|
self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
|
||||||
return False
|
return False
|
||||||
ffpp.check_version()
|
ffpp.check_version()
|
||||||
|
|
||||||
@ -312,7 +403,15 @@ class FFmpegFD(ExternalFD):
|
|||||||
# if end_time:
|
# if end_time:
|
||||||
# args += ['-t', compat_str(end_time - start_time)]
|
# args += ['-t', compat_str(end_time - start_time)]
|
||||||
|
|
||||||
if info_dict['http_headers'] and re.match(r'^https?://', url):
|
url = info_dict['url']
|
||||||
|
cookies = self.ydl.cookiejar.get_cookies_for_url(url)
|
||||||
|
if cookies:
|
||||||
|
args.extend(['-cookies', ''.join(
|
||||||
|
'{0}={1}; path={2}; domain={3};\r\n'.format(
|
||||||
|
cookie.name, cookie.value, cookie.path, cookie.domain)
|
||||||
|
for cookie in cookies)])
|
||||||
|
|
||||||
|
if info_dict.get('http_headers') and re.match(r'^https?://', url):
|
||||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||||
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
headers = handle_youtubedl_headers(info_dict['http_headers'])
|
||||||
@ -392,7 +491,12 @@ class FFmpegFD(ExternalFD):
|
|||||||
|
|
||||||
self._debug_cmd(args)
|
self._debug_cmd(args)
|
||||||
|
|
||||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
|
# From [1], a PIPE opened in Popen() should be closed, unless
|
||||||
|
# .communicate() is called. Avoid leaking any PIPEs by using Popen
|
||||||
|
# as a context manager (newer Python 3.x and compat)
|
||||||
|
# Fixes "Resource Warning" in test/test_downloader_external.py
|
||||||
|
# [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
|
||||||
|
with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
|
||||||
try:
|
try:
|
||||||
retval = proc.wait()
|
retval = proc.wait()
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
@ -405,7 +509,6 @@ class FFmpegFD(ExternalFD):
|
|||||||
process_communicate_or_kill(proc, b'q')
|
process_communicate_or_kill(proc, b'q')
|
||||||
else:
|
else:
|
||||||
proc.kill()
|
proc.kill()
|
||||||
proc.wait()
|
|
||||||
raise
|
raise
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
@ -141,6 +141,7 @@ class HttpFD(FileDownloader):
|
|||||||
# Content-Range is either not present or invalid. Assuming remote webserver is
|
# Content-Range is either not present or invalid. Assuming remote webserver is
|
||||||
# trying to send the whole file, resume is not possible, so wiping the local file
|
# trying to send the whole file, resume is not possible, so wiping the local file
|
||||||
# and performing entire redownload
|
# and performing entire redownload
|
||||||
|
if range_start > 0:
|
||||||
self.report_unable_to_resume()
|
self.report_unable_to_resume()
|
||||||
ctx.resume_len = 0
|
ctx.resume_len = 0
|
||||||
ctx.open_mode = 'wb'
|
ctx.open_mode = 'wb'
|
||||||
@ -293,7 +294,7 @@ class HttpFD(FileDownloader):
|
|||||||
|
|
||||||
# Progress message
|
# Progress message
|
||||||
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
|
||||||
eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))
|
eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - byte_counter))
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
'status': 'downloading',
|
'status': 'downloading',
|
||||||
|
79
youtube_dl/extractor/caffeine.py
Normal file
79
youtube_dl/extractor/caffeine.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_iso8601,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CaffeineTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P<id>[0-9a-f-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GOOOOD MORNINNNNN #highlights',
|
||||||
|
'timestamp': 1654702180,
|
||||||
|
'upload_date': '20220608',
|
||||||
|
'uploader': 'TsuSurf',
|
||||||
|
'duration': 3145,
|
||||||
|
'age_limit': 17,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
json_data = self._download_json(
|
||||||
|
'https://api.caffeine.tv/social/public/activity/' + video_id,
|
||||||
|
video_id)
|
||||||
|
broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {}
|
||||||
|
title = broadcast_info['broadcast_title']
|
||||||
|
video_url = broadcast_info['video_url']
|
||||||
|
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', entry_protocol='m3u8',
|
||||||
|
fatal=False)
|
||||||
|
else:
|
||||||
|
formats = [{'url': video_url}]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}, traverse_obj(json_data, {
|
||||||
|
'uploader': ((None, 'user'), 'username'),
|
||||||
|
}, get_all=False), traverse_obj(json_data, {
|
||||||
|
'like_count': ('like_count', T(int_or_none)),
|
||||||
|
'view_count': ('view_count', T(int_or_none)),
|
||||||
|
'comment_count': ('comment_count', T(int_or_none)),
|
||||||
|
'tags': ('tags', Ellipsis, T(txt_or_none)),
|
||||||
|
'is_live': 'is_live',
|
||||||
|
'uploader': ('user', 'name'),
|
||||||
|
}), traverse_obj(broadcast_info, {
|
||||||
|
'duration': ('content_duration', T(int_or_none)),
|
||||||
|
'timestamp': ('broadcast_start_time', T(parse_iso8601)),
|
||||||
|
'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))),
|
||||||
|
'age_limit': ('content_rating', T(lambda r: r and {
|
||||||
|
# assume Apple Store ratings [1]
|
||||||
|
# 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||||
|
'FOUR_PLUS': 0,
|
||||||
|
'NINE_PLUS': 9,
|
||||||
|
'TWELVE_PLUS': 12,
|
||||||
|
'SEVENTEEN_PLUS': 17,
|
||||||
|
}.get(r, 17))),
|
||||||
|
}))
|
69
youtube_dl/extractor/clipchamp.py
Normal file
69
youtube_dl/extractor/clipchamp.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
merge_dicts,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipchampIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gRXZ4ZhdDaU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Untitled video',
|
||||||
|
'uploader': 'Alexander Schwartz',
|
||||||
|
'timestamp': 1680805580,
|
||||||
|
'upload_date': '20230406',
|
||||||
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
||||||
|
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
||||||
|
|
||||||
|
storage_location = data.get('storage_location')
|
||||||
|
if storage_location != 'cf_stream':
|
||||||
|
raise ExtractorError('Unsupported clip storage location "%s"' % (storage_location,))
|
||||||
|
|
||||||
|
path = data['download_url']
|
||||||
|
iframe = self._download_webpage(
|
||||||
|
'https://iframe.cloudflarestream.com/' + path, video_id, 'Downloading player iframe')
|
||||||
|
subdomain = self._search_regex(
|
||||||
|
r'''\bcustomer-domain-prefix\s*=\s*("|')(?P<sd>[\w-]+)\1''', iframe,
|
||||||
|
'subdomain', group='sd', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
||||||
|
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
||||||
|
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
||||||
|
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), T(compat_str)))) or None,
|
||||||
|
}, traverse_obj(data, {
|
||||||
|
'title': ('project', 'project_name', T(compat_str)),
|
||||||
|
'timestamp': ('created_at', T(unified_timestamp)),
|
||||||
|
'thumbnail': ('thumbnail_url', T(url_or_none)),
|
||||||
|
}), rev=True)
|
@ -2,7 +2,9 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import collections
|
||||||
import datetime
|
import datetime
|
||||||
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import netrc
|
import netrc
|
||||||
@ -23,6 +25,9 @@ from ..compat import (
|
|||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_integer_types,
|
compat_integer_types,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_kwargs,
|
||||||
|
compat_map as map,
|
||||||
|
compat_open as open,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
@ -31,6 +36,7 @@ from ..compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
from ..downloader.f4m import (
|
from ..downloader.f4m import (
|
||||||
get_base_url,
|
get_base_url,
|
||||||
@ -54,6 +60,7 @@ from ..utils import (
|
|||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
GeoUtils,
|
GeoUtils,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
JSON_LD_RE,
|
JSON_LD_RE,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
@ -70,6 +77,8 @@ from ..utils import (
|
|||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -79,6 +88,7 @@ from ..utils import (
|
|||||||
urljoin,
|
urljoin,
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
variadic,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
@ -174,6 +184,8 @@ class InfoExtractor(object):
|
|||||||
fragment_base_url
|
fragment_base_url
|
||||||
* "duration" (optional, int or float)
|
* "duration" (optional, int or float)
|
||||||
* "filesize" (optional, int)
|
* "filesize" (optional, int)
|
||||||
|
* "range" (optional, str of the form "start-end"
|
||||||
|
to use in HTTP Range header)
|
||||||
* preference Order number of this format. If this field is
|
* preference Order number of this format. If this field is
|
||||||
present and not None, the formats get sorted
|
present and not None, the formats get sorted
|
||||||
by this field, regardless of all other values.
|
by this field, regardless of all other values.
|
||||||
@ -367,9 +379,22 @@ class InfoExtractor(object):
|
|||||||
title, description etc.
|
title, description etc.
|
||||||
|
|
||||||
|
|
||||||
Subclasses of this one should re-define the _real_initialize() and
|
A subclass of InfoExtractor must be defined to handle each specific site (or
|
||||||
_real_extract() methods and define a _VALID_URL regexp.
|
several sites). Such a concrete subclass should be added to the list of
|
||||||
Probably, they should also be added to the list of extractors.
|
extractors. It should also:
|
||||||
|
* define its _VALID_URL attribute as a regexp, or a Sequence of alternative
|
||||||
|
regexps (but see below)
|
||||||
|
* re-define the _real_extract() method
|
||||||
|
* optionally re-define the _real_initialize() method.
|
||||||
|
|
||||||
|
An extractor subclass may also override suitable() if necessary, but the
|
||||||
|
function signature must be preserved and the function must import everything
|
||||||
|
it needs (except other extractors), so that lazy_extractors works correctly.
|
||||||
|
If the subclass's suitable() and _real_extract() functions avoid using
|
||||||
|
_VALID_URL, the subclass need not set that class attribute.
|
||||||
|
|
||||||
|
An abstract subclass of InfoExtractor may be used to simplify implementation
|
||||||
|
within an extractor module; it should not be added to the list of extractors.
|
||||||
|
|
||||||
_GEO_BYPASS attribute may be set to False in order to disable
|
_GEO_BYPASS attribute may be set to False in order to disable
|
||||||
geo restriction bypass mechanisms for a particular extractor.
|
geo restriction bypass mechanisms for a particular extractor.
|
||||||
@ -404,22 +429,33 @@ class InfoExtractor(object):
|
|||||||
self._x_forwarded_for_ip = None
|
self._x_forwarded_for_ip = None
|
||||||
self.set_downloader(downloader)
|
self.set_downloader(downloader)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __match_valid_url(cls, url):
|
||||||
|
# This does not use has/getattr intentionally - we want to know whether
|
||||||
|
# we have cached the regexp for cls, whereas getattr would also
|
||||||
|
# match its superclass
|
||||||
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
# _VALID_URL can now be a list/tuple of patterns
|
||||||
|
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
|
||||||
|
# 20% faster than next(filter(None, (p.match(url) for p in cls._VALID_URL_RE)), None) in 2.7
|
||||||
|
for p in cls._VALID_URL_RE:
|
||||||
|
p = p.match(url)
|
||||||
|
if p:
|
||||||
|
return p
|
||||||
|
|
||||||
|
# The public alias can safely be overridden, as in some back-ports
|
||||||
|
_match_valid_url = __match_valid_url
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
|
# This function must import everything it needs (except other extractors),
|
||||||
# This does not use has/getattr intentionally - we want to know whether
|
# so that lazy_extractors works correctly
|
||||||
# we have cached the regexp for *this* class, whereas getattr would also
|
return cls.__match_valid_url(url) is not None
|
||||||
# match the superclass
|
|
||||||
if '_VALID_URL_RE' not in cls.__dict__:
|
|
||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
|
||||||
return cls._VALID_URL_RE.match(url) is not None
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _match_id(cls, url):
|
def _match_id(cls, url):
|
||||||
if '_VALID_URL_RE' not in cls.__dict__:
|
m = cls.__match_valid_url(url)
|
||||||
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
|
||||||
m = cls._VALID_URL_RE.match(url)
|
|
||||||
assert m
|
assert m
|
||||||
return compat_str(m.group('id'))
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
@ -566,6 +602,14 @@ class InfoExtractor(object):
|
|||||||
"""Sets the downloader for this IE."""
|
"""Sets the downloader for this IE."""
|
||||||
self._downloader = downloader
|
self._downloader = downloader
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache(self):
|
||||||
|
return self._downloader.cache
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cookiejar(self):
|
||||||
|
return self._downloader.cookiejar
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
"""Real initialization process. Redefine in subclasses."""
|
"""Real initialization process. Redefine in subclasses."""
|
||||||
pass
|
pass
|
||||||
@ -912,14 +956,47 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
self.report_warning(errmsg + str(ve))
|
self.report_warning(errmsg + str(ve))
|
||||||
|
|
||||||
def report_warning(self, msg, video_id=None):
|
def __ie_msg(self, *msg):
|
||||||
|
return '[{0}] {1}'.format(self.IE_NAME, ''.join(msg))
|
||||||
|
|
||||||
|
# msg, video_id=None, *args, only_once=False, **kwargs
|
||||||
|
def report_warning(self, msg, *args, **kwargs):
|
||||||
|
if len(args) > 0:
|
||||||
|
video_id = args[0]
|
||||||
|
args = args[1:]
|
||||||
|
else:
|
||||||
|
video_id = kwargs.pop('video_id', None)
|
||||||
idstr = '' if video_id is None else '%s: ' % video_id
|
idstr = '' if video_id is None else '%s: ' % video_id
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'[%s] %s%s' % (self.IE_NAME, idstr, msg))
|
self.__ie_msg(idstr, msg), *args, **kwargs)
|
||||||
|
|
||||||
def to_screen(self, msg):
|
def to_screen(self, msg):
|
||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
|
self._downloader.to_screen(self.__ie_msg(msg))
|
||||||
|
|
||||||
|
def write_debug(self, msg, only_once=False, _cache=[]):
|
||||||
|
'''Log debug message or Print message to stderr'''
|
||||||
|
if not self.get_param('verbose', False):
|
||||||
|
return
|
||||||
|
message = '[debug] ' + self.__ie_msg(msg)
|
||||||
|
logger = self.get_param('logger')
|
||||||
|
if logger:
|
||||||
|
logger.debug(message)
|
||||||
|
else:
|
||||||
|
if only_once and hash(message) in _cache:
|
||||||
|
return
|
||||||
|
self._downloader.to_stderr(message)
|
||||||
|
_cache.append(hash(message))
|
||||||
|
|
||||||
|
# name, default=None, *args, **kwargs
|
||||||
|
def get_param(self, name, *args, **kwargs):
|
||||||
|
default, args = (args[0], args[1:]) if len(args) > 0 else (kwargs.pop('default', None), args)
|
||||||
|
if self._downloader:
|
||||||
|
return self._downloader.params.get(name, default, *args, **kwargs)
|
||||||
|
return default
|
||||||
|
|
||||||
|
def report_drm(self, video_id):
|
||||||
|
self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
|
||||||
|
|
||||||
def report_extraction(self, id_or_name):
|
def report_extraction(self, id_or_name):
|
||||||
"""Report information extraction."""
|
"""Report information extraction."""
|
||||||
@ -947,6 +1024,15 @@ class InfoExtractor(object):
|
|||||||
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
|
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
|
||||||
raise GeoRestrictedError(msg, countries=countries)
|
raise GeoRestrictedError(msg, countries=countries)
|
||||||
|
|
||||||
|
def raise_no_formats(self, msg, expected=False, video_id=None):
|
||||||
|
if expected and (
|
||||||
|
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
|
||||||
|
self.report_warning(msg, video_id)
|
||||||
|
elif isinstance(msg, ExtractorError):
|
||||||
|
raise msg
|
||||||
|
else:
|
||||||
|
raise ExtractorError(msg, expected=expected, video_id=video_id)
|
||||||
|
|
||||||
# Methods for following #608
|
# Methods for following #608
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||||
@ -1005,6 +1091,8 @@ class InfoExtractor(object):
|
|||||||
if group is None:
|
if group is None:
|
||||||
# return the first matching group
|
# return the first matching group
|
||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
|
elif isinstance(group, (list, tuple)):
|
||||||
|
return tuple(mobj.group(g) for g in group)
|
||||||
else:
|
else:
|
||||||
return mobj.group(group)
|
return mobj.group(group)
|
||||||
elif default is not NO_DEFAULT:
|
elif default is not NO_DEFAULT:
|
||||||
@ -1015,23 +1103,76 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
|
||||||
|
"""Searches string for the JSON object specified by start_pattern"""
|
||||||
|
|
||||||
|
# self, start_pattern, string, name, video_id, *, end_pattern='',
|
||||||
|
# contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT
|
||||||
|
# NB: end_pattern is only used to reduce the size of the initial match
|
||||||
|
end_pattern = kwargs.pop('end_pattern', '')
|
||||||
|
# (?:[\s\S]) simulates (?(s):.) (eg)
|
||||||
|
contains_pattern = kwargs.pop('contains_pattern', r'{[\s\S]+}')
|
||||||
|
fatal = kwargs.pop('fatal', True)
|
||||||
|
default = kwargs.pop('default', NO_DEFAULT)
|
||||||
|
|
||||||
|
if default is NO_DEFAULT:
|
||||||
|
default, has_default = {}, False
|
||||||
|
else:
|
||||||
|
fatal, has_default = False, True
|
||||||
|
|
||||||
|
json_string = self._search_regex(
|
||||||
|
r'(?:{0})\s*(?P<json>{1})\s*(?:{2})'.format(
|
||||||
|
start_pattern, contains_pattern, end_pattern),
|
||||||
|
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
|
||||||
|
if not json_string:
|
||||||
|
return default
|
||||||
|
|
||||||
|
# yt-dlp has a special JSON parser that allows trailing text.
|
||||||
|
# Until that arrives here, the diagnostic from the exception
|
||||||
|
# raised by json.loads() is used to extract the wanted text.
|
||||||
|
# Either way, it's a problem if a transform_source() can't
|
||||||
|
# handle the trailing text.
|
||||||
|
|
||||||
|
# force an exception
|
||||||
|
kwargs['fatal'] = True
|
||||||
|
|
||||||
|
# self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
|
||||||
|
for _ in range(2):
|
||||||
|
try:
|
||||||
|
# return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
|
||||||
|
transform_source = kwargs.pop('transform_source', None)
|
||||||
|
if transform_source:
|
||||||
|
json_string = transform_source(json_string)
|
||||||
|
return self._parse_json(json_string, video_id, **compat_kwargs(kwargs))
|
||||||
|
except ExtractorError as e:
|
||||||
|
end = int_or_none(self._search_regex(r'\(char\s+(\d+)', error_to_compat_str(e), 'end', default=None))
|
||||||
|
if end is not None:
|
||||||
|
json_string = json_string[:end]
|
||||||
|
continue
|
||||||
|
msg = 'Unable to extract {0} - Failed to parse JSON'.format(name)
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg, cause=e.cause, video_id=video_id)
|
||||||
|
elif not has_default:
|
||||||
|
self.report_warning(
|
||||||
|
'{0}: {1}'.format(msg, error_to_compat_str(e)), video_id=video_id)
|
||||||
|
return default
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
"""
|
"""
|
||||||
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
||||||
if res:
|
if isinstance(res, tuple):
|
||||||
return clean_html(res).strip()
|
return tuple(map(clean_html, res))
|
||||||
else:
|
return clean_html(res)
|
||||||
return res
|
|
||||||
|
|
||||||
def _get_netrc_login_info(self, netrc_machine=None):
|
def _get_netrc_login_info(self, netrc_machine=None):
|
||||||
username = None
|
username = None
|
||||||
password = None
|
password = None
|
||||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
|
||||||
|
|
||||||
if self._downloader.params.get('usenetrc', False):
|
if self._downloader.params.get('usenetrc', False):
|
||||||
try:
|
try:
|
||||||
|
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||||
info = netrc.netrc().authenticators(netrc_machine)
|
info = netrc.netrc().authenticators(netrc_machine)
|
||||||
if info is not None:
|
if info is not None:
|
||||||
username = info[0]
|
username = info[0]
|
||||||
@ -1039,7 +1180,7 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
raise netrc.NetrcParseError(
|
raise netrc.NetrcParseError(
|
||||||
'No authenticators for %s' % netrc_machine)
|
'No authenticators for %s' % netrc_machine)
|
||||||
except (IOError, netrc.NetrcParseError) as err:
|
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
||||||
self._downloader.report_warning(
|
self._downloader.report_warning(
|
||||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||||
|
|
||||||
@ -1348,6 +1489,48 @@ class InfoExtractor(object):
|
|||||||
break
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
|
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||||
|
# ..., *, transform_source=None, fatal=True, default=NO_DEFAULT
|
||||||
|
|
||||||
|
# TODO: remove this backward compat
|
||||||
|
default = kw.get('default', NO_DEFAULT)
|
||||||
|
if default == '{}':
|
||||||
|
kw['default'] = {}
|
||||||
|
kw = compat_kwargs(kw)
|
||||||
|
|
||||||
|
return self._search_json(
|
||||||
|
r'''<script\s[^>]*?\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>''',
|
||||||
|
webpage, 'next.js data', video_id, end_pattern='</script>',
|
||||||
|
**kw)
|
||||||
|
|
||||||
|
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
|
||||||
|
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||||
|
|
||||||
|
# self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)
|
||||||
|
context_name = args[0] if len(args) > 0 else kwargs.get('context_name', '__NUXT__')
|
||||||
|
fatal = kwargs.get('fatal', True)
|
||||||
|
traverse = kwargs.get('traverse', ('data', 0))
|
||||||
|
|
||||||
|
re_ctx = re.escape(context_name)
|
||||||
|
|
||||||
|
FUNCTION_RE = (r'\(\s*function\s*\((?P<arg_keys>[\s\S]*?)\)\s*\{\s*'
|
||||||
|
r'return\s+(?P<js>\{[\s\S]*?})\s*;?\s*}\s*\((?P<arg_vals>[\s\S]*?)\)')
|
||||||
|
|
||||||
|
js, arg_keys, arg_vals = self._search_regex(
|
||||||
|
(p.format(re_ctx, FUNCTION_RE) for p in
|
||||||
|
(r'<script>\s*window\s*\.\s*{0}\s*=\s*{1}\s*\)\s*;?\s*</script>',
|
||||||
|
r'{0}\s*\([\s\S]*?{1}')),
|
||||||
|
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||||
|
default=NO_DEFAULT if fatal else (None, None, None))
|
||||||
|
if js is None:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
|
||||||
|
'[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
|
||||||
|
|
||||||
|
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
||||||
|
return traverse_obj(ret, traverse) or {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||||
@ -1632,6 +1815,12 @@ class InfoExtractor(object):
|
|||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _report_ignoring_subs(self, name):
|
||||||
|
self.report_warning(bug_reports_message(
|
||||||
|
'Ignoring subtitle tracks found in the {0} manifest; '
|
||||||
|
'if any subtitle tracks are missing,'.format(name)
|
||||||
|
), only_once=True)
|
||||||
|
|
||||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||||
entry_protocol='m3u8', preference=None,
|
entry_protocol='m3u8', preference=None,
|
||||||
m3u8_id=None, note=None, errnote=None,
|
m3u8_id=None, note=None, errnote=None,
|
||||||
@ -2072,23 +2261,46 @@ class InfoExtractor(object):
|
|||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
def _extract_mpd_formats(self, *args, **kwargs):
|
||||||
|
fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
|
||||||
|
if subs:
|
||||||
|
self._report_ignoring_subs('DASH')
|
||||||
|
return fmts
|
||||||
|
|
||||||
|
def _extract_mpd_formats_and_subtitles(
|
||||||
|
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||||
|
fatal=True, data=None, headers=None, query=None):
|
||||||
|
|
||||||
|
# TODO: or not? param not yet implemented
|
||||||
|
if self.get_param('ignore_no_formats_error'):
|
||||||
|
fatal = False
|
||||||
|
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
mpd_url, video_id,
|
mpd_url, video_id,
|
||||||
note=note or 'Downloading MPD manifest',
|
note='Downloading MPD manifest' if note is None else note,
|
||||||
errnote=errnote or 'Failed to download MPD manifest',
|
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers or {}, query=query or {})
|
||||||
if res is False:
|
if res is False:
|
||||||
return []
|
return [], {}
|
||||||
mpd_doc, urlh = res
|
mpd_doc, urlh = res
|
||||||
if mpd_doc is None:
|
if mpd_doc is None:
|
||||||
return []
|
return [], {}
|
||||||
mpd_base_url = base_url(urlh.geturl())
|
|
||||||
|
|
||||||
return self._parse_mpd_formats(
|
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||||
|
mpd_url = urlh.geturl()
|
||||||
|
mpd_base_url = base_url(mpd_url)
|
||||||
|
|
||||||
|
return self._parse_mpd_formats_and_subtitles(
|
||||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||||
|
|
||||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
def _parse_mpd_formats(self, *args, **kwargs):
|
||||||
|
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||||
|
if subs:
|
||||||
|
self._report_ignoring_subs('DASH')
|
||||||
|
return fmts
|
||||||
|
|
||||||
|
def _parse_mpd_formats_and_subtitles(
|
||||||
|
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||||
"""
|
"""
|
||||||
Parse formats from MPD manifest.
|
Parse formats from MPD manifest.
|
||||||
References:
|
References:
|
||||||
@ -2096,8 +2308,10 @@ class InfoExtractor(object):
|
|||||||
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
|
||||||
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||||
"""
|
"""
|
||||||
|
# TODO: param not yet implemented: default like previous yt-dl logic
|
||||||
|
if not self.get_param('dynamic_mpd', False):
|
||||||
if mpd_doc.get('type') == 'dynamic':
|
if mpd_doc.get('type') == 'dynamic':
|
||||||
return []
|
return [], {}
|
||||||
|
|
||||||
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
|
||||||
|
|
||||||
@ -2107,8 +2321,24 @@ class InfoExtractor(object):
|
|||||||
def is_drm_protected(element):
|
def is_drm_protected(element):
|
||||||
return element.find(_add_ns('ContentProtection')) is not None
|
return element.find(_add_ns('ContentProtection')) is not None
|
||||||
|
|
||||||
|
from ..utils import YoutubeDLHandler
|
||||||
|
fix_path = YoutubeDLHandler._fix_path
|
||||||
|
|
||||||
|
def resolve_base_url(element, parent_base_url=None):
|
||||||
|
# TODO: use native XML traversal when ready
|
||||||
|
b_url = traverse_obj(element, (
|
||||||
|
T(lambda e: e.find(_add_ns('BaseURL')).text)))
|
||||||
|
if parent_base_url and b_url:
|
||||||
|
if not parent_base_url[-1] in ('/', ':'):
|
||||||
|
parent_base_url += '/'
|
||||||
|
b_url = compat_urlparse.urljoin(parent_base_url, b_url)
|
||||||
|
if b_url:
|
||||||
|
b_url = fix_path(b_url)
|
||||||
|
return b_url or parent_base_url
|
||||||
|
|
||||||
def extract_multisegment_info(element, ms_parent_info):
|
def extract_multisegment_info(element, ms_parent_info):
|
||||||
ms_info = ms_parent_info.copy()
|
ms_info = ms_parent_info.copy()
|
||||||
|
base_url = ms_info['base_url'] = resolve_base_url(element, ms_info.get('base_url'))
|
||||||
|
|
||||||
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
|
||||||
# common attributes and elements. We will only extract relevant
|
# common attributes and elements. We will only extract relevant
|
||||||
@ -2142,15 +2372,27 @@ class InfoExtractor(object):
|
|||||||
def extract_Initialization(source):
|
def extract_Initialization(source):
|
||||||
initialization = source.find(_add_ns('Initialization'))
|
initialization = source.find(_add_ns('Initialization'))
|
||||||
if initialization is not None:
|
if initialization is not None:
|
||||||
ms_info['initialization_url'] = initialization.attrib['sourceURL']
|
ms_info['initialization_url'] = initialization.get('sourceURL') or base_url
|
||||||
|
initialization_url_range = initialization.get('range')
|
||||||
|
if initialization_url_range:
|
||||||
|
ms_info['initialization_url_range'] = initialization_url_range
|
||||||
|
|
||||||
segment_list = element.find(_add_ns('SegmentList'))
|
segment_list = element.find(_add_ns('SegmentList'))
|
||||||
if segment_list is not None:
|
if segment_list is not None:
|
||||||
extract_common(segment_list)
|
extract_common(segment_list)
|
||||||
extract_Initialization(segment_list)
|
extract_Initialization(segment_list)
|
||||||
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
|
||||||
if segment_urls_e:
|
segment_urls = traverse_obj(segment_urls_e, (
|
||||||
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
|
Ellipsis, T(lambda e: e.attrib), 'media'))
|
||||||
|
if segment_urls:
|
||||||
|
ms_info['segment_urls'] = segment_urls
|
||||||
|
segment_urls_range = traverse_obj(segment_urls_e, (
|
||||||
|
Ellipsis, T(lambda e: e.attrib), 'mediaRange',
|
||||||
|
T(lambda r: re.findall(r'^\d+-\d+$', r)), 0))
|
||||||
|
if segment_urls_range:
|
||||||
|
ms_info['segment_urls_range'] = segment_urls_range
|
||||||
|
if not segment_urls:
|
||||||
|
ms_info['segment_urls'] = [base_url for _ in segment_urls_range]
|
||||||
else:
|
else:
|
||||||
segment_template = element.find(_add_ns('SegmentTemplate'))
|
segment_template = element.find(_add_ns('SegmentTemplate'))
|
||||||
if segment_template is not None:
|
if segment_template is not None:
|
||||||
@ -2166,17 +2408,20 @@ class InfoExtractor(object):
|
|||||||
return ms_info
|
return ms_info
|
||||||
|
|
||||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
formats = []
|
formats, subtitles = [], {}
|
||||||
|
stream_numbers = collections.defaultdict(int)
|
||||||
|
mpd_base_url = resolve_base_url(mpd_doc, mpd_base_url or mpd_url)
|
||||||
for period in mpd_doc.findall(_add_ns('Period')):
|
for period in mpd_doc.findall(_add_ns('Period')):
|
||||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||||
period_ms_info = extract_multisegment_info(period, {
|
period_ms_info = extract_multisegment_info(period, {
|
||||||
'start_number': 1,
|
'start_number': 1,
|
||||||
'timescale': 1,
|
'timescale': 1,
|
||||||
|
'base_url': mpd_base_url,
|
||||||
})
|
})
|
||||||
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
|
for adaptation_set in period.findall(_add_ns('AdaptationSet')):
|
||||||
if is_drm_protected(adaptation_set):
|
if is_drm_protected(adaptation_set):
|
||||||
continue
|
continue
|
||||||
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
|
adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
|
||||||
for representation in adaptation_set.findall(_add_ns('Representation')):
|
for representation in adaptation_set.findall(_add_ns('Representation')):
|
||||||
if is_drm_protected(representation):
|
if is_drm_protected(representation):
|
||||||
continue
|
continue
|
||||||
@ -2184,27 +2429,35 @@ class InfoExtractor(object):
|
|||||||
representation_attrib.update(representation.attrib)
|
representation_attrib.update(representation.attrib)
|
||||||
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
|
||||||
mime_type = representation_attrib['mimeType']
|
mime_type = representation_attrib['mimeType']
|
||||||
content_type = mime_type.split('/')[0]
|
content_type = representation_attrib.get('contentType') or mime_type.split('/')[0]
|
||||||
if content_type == 'text':
|
codec_str = representation_attrib.get('codecs', '')
|
||||||
# TODO implement WebVTT downloading
|
# Some kind of binary subtitle found in some youtube livestreams
|
||||||
pass
|
if mime_type == 'application/x-rawcc':
|
||||||
elif content_type in ('video', 'audio'):
|
codecs = {'scodec': codec_str}
|
||||||
base_url = ''
|
else:
|
||||||
for element in (representation, adaptation_set, period, mpd_doc):
|
codecs = parse_codecs(codec_str)
|
||||||
base_url_e = element.find(_add_ns('BaseURL'))
|
if content_type not in ('video', 'audio', 'text'):
|
||||||
if base_url_e is not None:
|
if mime_type == 'image/jpeg':
|
||||||
base_url = base_url_e.text + base_url
|
content_type = mime_type
|
||||||
if re.match(r'^https?://', base_url):
|
elif codecs.get('vcodec', 'none') != 'none':
|
||||||
break
|
content_type = 'video'
|
||||||
if mpd_base_url and not re.match(r'^https?://', base_url):
|
elif codecs.get('acodec', 'none') != 'none':
|
||||||
if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
|
content_type = 'audio'
|
||||||
mpd_base_url += '/'
|
elif codecs.get('scodec', 'none') != 'none':
|
||||||
base_url = mpd_base_url + base_url
|
content_type = 'text'
|
||||||
|
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
|
||||||
|
content_type = 'text'
|
||||||
|
else:
|
||||||
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||||
|
continue
|
||||||
|
|
||||||
representation_id = representation_attrib.get('id')
|
representation_id = representation_attrib.get('id')
|
||||||
lang = representation_attrib.get('lang')
|
lang = representation_attrib.get('lang')
|
||||||
url_el = representation.find(_add_ns('BaseURL'))
|
url_el = representation.find(_add_ns('BaseURL'))
|
||||||
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
|
||||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||||
|
format_id = join_nonempty(representation_id or content_type, mpd_id)
|
||||||
|
if content_type in ('video', 'audio'):
|
||||||
f = {
|
f = {
|
||||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||||
'manifest_url': mpd_url,
|
'manifest_url': mpd_url,
|
||||||
@ -2219,8 +2472,27 @@ class InfoExtractor(object):
|
|||||||
'filesize': filesize,
|
'filesize': filesize,
|
||||||
'container': mimetype2ext(mime_type) + '_dash',
|
'container': mimetype2ext(mime_type) + '_dash',
|
||||||
}
|
}
|
||||||
f.update(parse_codecs(representation_attrib.get('codecs')))
|
f.update(codecs)
|
||||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
elif content_type == 'text':
|
||||||
|
f = {
|
||||||
|
'ext': mimetype2ext(mime_type),
|
||||||
|
'manifest_url': mpd_url,
|
||||||
|
'filesize': filesize,
|
||||||
|
}
|
||||||
|
elif content_type == 'image/jpeg':
|
||||||
|
# See test case in VikiIE
|
||||||
|
# https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
|
||||||
|
f = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'ext': 'mhtml',
|
||||||
|
'manifest_url': mpd_url,
|
||||||
|
'format_note': 'DASH storyboards (jpeg)',
|
||||||
|
'acodec': 'none',
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
||||||
|
if is_drm_protected(adaptation_set) or is_drm_protected(representation):
|
||||||
|
f['has_drm'] = True
|
||||||
|
representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info)
|
||||||
|
|
||||||
def prepare_template(template_name, identifiers):
|
def prepare_template(template_name, identifiers):
|
||||||
tmpl = representation_ms_info[template_name]
|
tmpl = representation_ms_info[template_name]
|
||||||
@ -2261,6 +2533,11 @@ class InfoExtractor(object):
|
|||||||
def location_key(location):
|
def location_key(location):
|
||||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
return 'url' if re.match(r'^https?://', location) else 'path'
|
||||||
|
|
||||||
|
def calc_segment_duration():
|
||||||
|
return float_or_none(
|
||||||
|
representation_ms_info['segment_duration'],
|
||||||
|
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
|
||||||
|
|
||||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||||
|
|
||||||
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
|
||||||
@ -2272,7 +2549,8 @@ class InfoExtractor(object):
|
|||||||
segment_duration = None
|
segment_duration = None
|
||||||
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
|
||||||
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
|
||||||
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
|
representation_ms_info['total_number'] = int(math.ceil(
|
||||||
|
float_or_none(period_duration, segment_duration, default=0)))
|
||||||
representation_ms_info['fragments'] = [{
|
representation_ms_info['fragments'] = [{
|
||||||
media_location_key: media_template % {
|
media_location_key: media_template % {
|
||||||
'Number': segment_number,
|
'Number': segment_number,
|
||||||
@ -2312,11 +2590,12 @@ class InfoExtractor(object):
|
|||||||
add_segment_url()
|
add_segment_url()
|
||||||
segment_number += 1
|
segment_number += 1
|
||||||
segment_time += segment_d
|
segment_time += segment_d
|
||||||
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
|
elif 'segment_urls' in representation_ms_info:
|
||||||
|
fragments = []
|
||||||
|
if 's' in representation_ms_info:
|
||||||
# No media template
|
# No media template
|
||||||
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
|
||||||
# or any YouTube dashsegments video
|
# or any YouTube dashsegments video
|
||||||
fragments = []
|
|
||||||
segment_index = 0
|
segment_index = 0
|
||||||
timescale = representation_ms_info['timescale']
|
timescale = representation_ms_info['timescale']
|
||||||
for s in representation_ms_info['s']:
|
for s in representation_ms_info['s']:
|
||||||
@ -2328,28 +2607,37 @@ class InfoExtractor(object):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
})
|
})
|
||||||
segment_index += 1
|
segment_index += 1
|
||||||
representation_ms_info['fragments'] = fragments
|
elif 'segment_urls_range' in representation_ms_info:
|
||||||
elif 'segment_urls' in representation_ms_info:
|
# Segment URLs with mediaRange
|
||||||
|
# Example: https://kinescope.io/200615537/master.mpd
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/30235
|
||||||
|
# or any mpd generated with Bento4 `mp4dash --no-split --use-segment-list`
|
||||||
|
segment_duration = calc_segment_duration()
|
||||||
|
for segment_url, segment_url_range in zip(
|
||||||
|
representation_ms_info['segment_urls'], representation_ms_info['segment_urls_range']):
|
||||||
|
fragments.append({
|
||||||
|
location_key(segment_url): segment_url,
|
||||||
|
'range': segment_url_range,
|
||||||
|
'duration': segment_duration,
|
||||||
|
})
|
||||||
|
else:
|
||||||
# Segment URLs with no SegmentTimeline
|
# Segment URLs with no SegmentTimeline
|
||||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||||
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||||
fragments = []
|
segment_duration = calc_segment_duration()
|
||||||
segment_duration = float_or_none(
|
|
||||||
representation_ms_info['segment_duration'],
|
|
||||||
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
|
|
||||||
for segment_url in representation_ms_info['segment_urls']:
|
for segment_url in representation_ms_info['segment_urls']:
|
||||||
fragment = {
|
fragments.append({
|
||||||
location_key(segment_url): segment_url,
|
location_key(segment_url): segment_url,
|
||||||
}
|
'duration': segment_duration,
|
||||||
if segment_duration:
|
})
|
||||||
fragment['duration'] = segment_duration
|
|
||||||
fragments.append(fragment)
|
|
||||||
representation_ms_info['fragments'] = fragments
|
representation_ms_info['fragments'] = fragments
|
||||||
|
|
||||||
# If there is a fragments key available then we correctly recognized fragmented media.
|
# If there is a fragments key available then we correctly recognized fragmented media.
|
||||||
# Otherwise we will assume unfragmented media with direct access. Technically, such
|
# Otherwise we will assume unfragmented media with direct access. Technically, such
|
||||||
# assumption is not necessarily correct since we may simply have no support for
|
# assumption is not necessarily correct since we may simply have no support for
|
||||||
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
|
# some forms of fragmented media renditions yet, but for now we'll use this fallback.
|
||||||
if 'fragments' in representation_ms_info:
|
if 'fragments' in representation_ms_info:
|
||||||
|
base_url = representation_ms_info['base_url']
|
||||||
f.update({
|
f.update({
|
||||||
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||||
'url': mpd_url or base_url,
|
'url': mpd_url or base_url,
|
||||||
@ -2357,19 +2645,40 @@ class InfoExtractor(object):
|
|||||||
'fragments': [],
|
'fragments': [],
|
||||||
'protocol': 'http_dash_segments',
|
'protocol': 'http_dash_segments',
|
||||||
})
|
})
|
||||||
if 'initialization_url' in representation_ms_info:
|
if 'initialization_url' in representation_ms_info and 'initialization_url_range' in representation_ms_info:
|
||||||
|
# Initialization URL with range (accompanied by Segment URLs with mediaRange above)
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/30235
|
||||||
|
initialization_url = representation_ms_info['initialization_url']
|
||||||
|
f['fragments'].append({
|
||||||
|
location_key(initialization_url): initialization_url,
|
||||||
|
'range': representation_ms_info['initialization_url_range'],
|
||||||
|
})
|
||||||
|
elif 'initialization_url' in representation_ms_info:
|
||||||
initialization_url = representation_ms_info['initialization_url']
|
initialization_url = representation_ms_info['initialization_url']
|
||||||
if not f.get('url'):
|
if not f.get('url'):
|
||||||
f['url'] = initialization_url
|
f['url'] = initialization_url
|
||||||
f['fragments'].append({location_key(initialization_url): initialization_url})
|
f['fragments'].append({location_key(initialization_url): initialization_url})
|
||||||
|
elif 'initialization_url_range' in representation_ms_info:
|
||||||
|
# no Initialization URL but range (accompanied by no Segment URLs but mediaRange above)
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/27575
|
||||||
|
f['fragments'].append({
|
||||||
|
location_key(base_url): base_url,
|
||||||
|
'range': representation_ms_info['initialization_url_range'],
|
||||||
|
})
|
||||||
f['fragments'].extend(representation_ms_info['fragments'])
|
f['fragments'].extend(representation_ms_info['fragments'])
|
||||||
|
if not period_duration:
|
||||||
|
period_duration = sum(traverse_obj(representation_ms_info, (
|
||||||
|
'fragments', Ellipsis, 'duration', T(float_or_none))))
|
||||||
else:
|
else:
|
||||||
# Assuming direct URL to unfragmented media.
|
# Assuming direct URL to unfragmented media.
|
||||||
f['url'] = base_url
|
f['url'] = representation_ms_info['base_url']
|
||||||
|
if content_type in ('video', 'audio', 'image/jpeg'):
|
||||||
|
f['manifest_stream_number'] = stream_numbers[f['url']]
|
||||||
|
stream_numbers[f['url']] += 1
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
else:
|
elif content_type == 'text':
|
||||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
subtitles.setdefault(lang or 'und', []).append(f)
|
||||||
return formats
|
return formats, subtitles
|
||||||
|
|
||||||
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||||
res = self._download_xml_handle(
|
res = self._download_xml_handle(
|
||||||
@ -2495,7 +2804,8 @@ class InfoExtractor(object):
|
|||||||
return f
|
return f
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _media_formats(src, cur_media_type, type_info={}):
|
def _media_formats(src, cur_media_type, type_info=None):
|
||||||
|
type_info = type_info or {}
|
||||||
full_url = absolute_url(src)
|
full_url = absolute_url(src)
|
||||||
ext = type_info.get('ext') or determine_ext(full_url)
|
ext = type_info.get('ext') or determine_ext(full_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
@ -2513,6 +2823,7 @@ class InfoExtractor(object):
|
|||||||
formats = [{
|
formats = [{
|
||||||
'url': full_url,
|
'url': full_url,
|
||||||
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||||
|
'ext': ext,
|
||||||
}]
|
}]
|
||||||
return is_plain_url, formats
|
return is_plain_url, formats
|
||||||
|
|
||||||
@ -2521,7 +2832,7 @@ class InfoExtractor(object):
|
|||||||
# so we wll include them right here (see
|
# so we wll include them right here (see
|
||||||
# https://www.ampproject.org/docs/reference/components/amp-video)
|
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||||
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
||||||
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
|
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video(?:-js)?|audio)'
|
||||||
media_tags = [(media_tag, media_tag_name, media_type, '')
|
media_tags = [(media_tag, media_tag_name, media_type, '')
|
||||||
for media_tag, media_tag_name, media_type
|
for media_tag, media_tag_name, media_type
|
||||||
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
||||||
@ -2539,7 +2850,8 @@ class InfoExtractor(object):
|
|||||||
media_attributes = extract_attributes(media_tag)
|
media_attributes = extract_attributes(media_tag)
|
||||||
src = strip_or_none(media_attributes.get('src'))
|
src = strip_or_none(media_attributes.get('src'))
|
||||||
if src:
|
if src:
|
||||||
_, formats = _media_formats(src, media_type)
|
f = parse_content_type(media_attributes.get('type'))
|
||||||
|
_, formats = _media_formats(src, media_type, f)
|
||||||
media_info['formats'].extend(formats)
|
media_info['formats'].extend(formats)
|
||||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||||
if media_content:
|
if media_content:
|
||||||
@ -2713,25 +3025,21 @@ class InfoExtractor(object):
|
|||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||||
mobj = re.search(
|
return self._search_json(
|
||||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
|
||||||
webpage)
|
webpage, 'JWPlayer data', video_id,
|
||||||
if mobj:
|
# must be a {...} or sequence, ending
|
||||||
try:
|
contains_pattern=r'\{[\s\S]*}(?(load)(?:\s*,\s*\{[\s\S]*})*)', end_pattern=r'(?(load)\]|\))',
|
||||||
jwplayer_data = self._parse_json(mobj.group('options'),
|
transform_source=transform_source, default=None)
|
||||||
video_id=video_id,
|
|
||||||
transform_source=transform_source)
|
|
||||||
except ExtractorError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
if isinstance(jwplayer_data, dict):
|
|
||||||
return jwplayer_data
|
|
||||||
|
|
||||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||||
jwplayer_data = self._find_jwplayer_data(
|
# allow passing `transform_source` through to _find_jwplayer_data()
|
||||||
webpage, video_id, transform_source=js_to_json)
|
transform_source = kwargs.pop('transform_source', None)
|
||||||
return self._parse_jwplayer_data(
|
kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
|
||||||
jwplayer_data, video_id, *args, **kwargs)
|
|
||||||
|
jwplayer_data = self._find_jwplayer_data(webpage, video_id, **kwfind)
|
||||||
|
|
||||||
|
return self._parse_jwplayer_data(jwplayer_data, video_id, *args, **kwargs)
|
||||||
|
|
||||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||||
@ -2765,16 +3073,8 @@ class InfoExtractor(object):
|
|||||||
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
tracks = video_data.get('tracks')
|
for track in traverse_obj(video_data, (
|
||||||
if tracks and isinstance(tracks, list):
|
'tracks', lambda _, t: t.get('kind').lower() in ('captions', 'subtitles'))):
|
||||||
for track in tracks:
|
|
||||||
if not isinstance(track, dict):
|
|
||||||
continue
|
|
||||||
track_kind = track.get('kind')
|
|
||||||
if not track_kind or not isinstance(track_kind, compat_str):
|
|
||||||
continue
|
|
||||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
|
||||||
continue
|
|
||||||
track_url = urljoin(base_url, track.get('file'))
|
track_url = urljoin(base_url, track.get('file'))
|
||||||
if not track_url:
|
if not track_url:
|
||||||
continue
|
continue
|
||||||
@ -2999,12 +3299,16 @@ class InfoExtractor(object):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
|
def _merge_subtitles(cls, subtitle_dict1, *subtitle_dicts, **kwargs):
|
||||||
""" Merge two subtitle dictionaries, language by language. """
|
""" Merge subtitle dictionaries, language by language. """
|
||||||
ret = dict(subtitle_dict1)
|
|
||||||
for lang in subtitle_dict2:
|
# ..., * , target=None
|
||||||
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
|
target = kwargs.get('target') or dict(subtitle_dict1)
|
||||||
return ret
|
|
||||||
|
for subtitle_dict in subtitle_dicts:
|
||||||
|
for lang in subtitle_dict:
|
||||||
|
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subtitle_dict[lang])
|
||||||
|
return target
|
||||||
|
|
||||||
def extract_automatic_captions(self, *args, **kwargs):
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writeautomaticsub', False)
|
if (self._downloader.params.get('writeautomaticsub', False)
|
||||||
@ -3037,6 +3341,29 @@ class InfoExtractor(object):
|
|||||||
def _generic_title(self, url):
|
def _generic_title(self, url):
|
||||||
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
|
||||||
|
|
||||||
|
def _yes_playlist(self, playlist_id, video_id, *args, **kwargs):
|
||||||
|
# smuggled_data=None, *, playlist_label='playlist', video_label='video'
|
||||||
|
smuggled_data = args[0] if len(args) == 1 else kwargs.get('smuggled_data')
|
||||||
|
playlist_label = kwargs.get('playlist_label', 'playlist')
|
||||||
|
video_label = kwargs.get('video_label', 'video')
|
||||||
|
|
||||||
|
if not playlist_id or not video_id:
|
||||||
|
return not video_id
|
||||||
|
|
||||||
|
no_playlist = (smuggled_data or {}).get('force_noplaylist')
|
||||||
|
if no_playlist is not None:
|
||||||
|
return not no_playlist
|
||||||
|
|
||||||
|
video_id = '' if video_id is True else ' ' + video_id
|
||||||
|
noplaylist = self.get_param('noplaylist')
|
||||||
|
self.to_screen(
|
||||||
|
'Downloading just the {0}{1} because of --no-playlist'.format(video_label, video_id)
|
||||||
|
if noplaylist else
|
||||||
|
'Downloading {0}{1} - add --no-playlist to download just the {2}{3}'.format(
|
||||||
|
playlist_label, '' if playlist_id is True else ' ' + playlist_id,
|
||||||
|
video_label, video_id))
|
||||||
|
return not noplaylist
|
||||||
|
|
||||||
|
|
||||||
class SearchInfoExtractor(InfoExtractor):
|
class SearchInfoExtractor(InfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
204
youtube_dl/extractor/dlf.py
Normal file
204
youtube_dl/extractor/dlf.py
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
variadic,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DLFBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
||||||
|
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
||||||
|
|
||||||
|
def _parse_button_attrs(self, button, audio_id=None):
|
||||||
|
attrs = extract_attributes(button)
|
||||||
|
audio_id = audio_id or attrs['data-audio-diraid']
|
||||||
|
|
||||||
|
url = traverse_obj(
|
||||||
|
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
||||||
|
'data-audio-src', expected_type=url_or_none)
|
||||||
|
ext = determine_ext(url)
|
||||||
|
formats = (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
||||||
|
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def traverse_attrs(path):
|
||||||
|
path = list(variadic(path))
|
||||||
|
t = path.pop() if callable(path[-1]) else None
|
||||||
|
return traverse_obj(attrs, path, expected_type=t, get_all=False)
|
||||||
|
|
||||||
|
def txt_or_none(v, default=None):
|
||||||
|
return default if v is None else (compat_str(v).strip() or default)
|
||||||
|
|
||||||
|
return merge_dicts(*reversed([{
|
||||||
|
'id': audio_id,
|
||||||
|
# 'extractor_key': DLFIE.ie_key(),
|
||||||
|
# 'extractor': DLFIE.IE_NAME,
|
||||||
|
'formats': formats,
|
||||||
|
}, dict((k, traverse_attrs(v)) for k, v in {
|
||||||
|
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), txt_or_none),
|
||||||
|
'duration': (('data-audioduration', 'data-audio-duration'), int_or_none),
|
||||||
|
'thumbnail': ('data-audioimage', url_or_none),
|
||||||
|
'uploader': 'data-audio-producer',
|
||||||
|
'series': 'data-audio-series',
|
||||||
|
'channel': 'data-audio-origin-site-name',
|
||||||
|
'webpage_url': ('data-audio-download-tracking-path', url_or_none),
|
||||||
|
}.items())]))
|
||||||
|
|
||||||
|
|
||||||
|
class DLFIE(DLFBaseIE):
|
||||||
|
IE_NAME = 'dlf'
|
||||||
|
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
||||||
|
_TESTS = [
|
||||||
|
# Audio as an HLS stream
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '03a3eb19',
|
||||||
|
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'duration': 3298,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'On Stage',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8'
|
||||||
|
},
|
||||||
|
'skip': 'This webpage no longer exists'
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd9cc1856',
|
||||||
|
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 291,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Kommentare und Themen der Woche',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, audio_id)
|
||||||
|
|
||||||
|
return self._parse_button_attrs(
|
||||||
|
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
||||||
|
|
||||||
|
|
||||||
|
class DLFCorpusIE(DLFBaseIE):
|
||||||
|
IE_NAME = 'dlf:corpus'
|
||||||
|
IE_DESC = 'DLF Multi-feed Archives'
|
||||||
|
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
||||||
|
_TESTS = [
|
||||||
|
# Recorded news broadcast with referrals to related broadcasts
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
||||||
|
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||||
|
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1fc5d64a',
|
||||||
|
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 252,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2ada145f',
|
||||||
|
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 336,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Deutschlandfunk Nova',
|
||||||
|
'channel': 'deutschlandfunk-nova'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e55e8c9',
|
||||||
|
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 187,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '47e1a096',
|
||||||
|
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 602,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e55e8c9',
|
||||||
|
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 187,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
# Podcast feed with tag buttons, playlist count fluctuates
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kommentare-und-themen-der-woche-100',
|
||||||
|
'title': 'Meinung - Kommentare und Themen der Woche',
|
||||||
|
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
},
|
||||||
|
# Podcast feed with no description
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'podcast-tolle-idee-100',
|
||||||
|
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
||||||
|
playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||||
|
self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage, default=None))
|
101
youtube_dl/extractor/epidemicsound.py
Normal file
101
youtube_dl/extractor/epidemicsound.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EpidemicSoundIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||||
|
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45014',
|
||||||
|
'display_id': 'yFfQVRpSPz',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
|
||||||
|
'title': 'Door Knock Door 1',
|
||||||
|
'duration': 1,
|
||||||
|
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||||
|
'timestamp': 1415320353,
|
||||||
|
'upload_date': '20141107',
|
||||||
|
'age_limit': None,
|
||||||
|
# check that the "best" format was found, since test file MD5 doesn't
|
||||||
|
# distinguish the formats
|
||||||
|
'format': 'full',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
|
||||||
|
'md5': 'c82b745890f9baf18dc2f8d568ee3830',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '148700',
|
||||||
|
'display_id': 'mj8GTTwsZd',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'tags': ['liquid drum n bass', 'energetic'],
|
||||||
|
'title': 'Noplace',
|
||||||
|
'duration': 237,
|
||||||
|
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
|
||||||
|
'timestamp': 1694426482,
|
||||||
|
'release_timestamp': 1700535606,
|
||||||
|
'upload_date': '20230911',
|
||||||
|
'age_limit': None,
|
||||||
|
'format': 'full',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
json_data = self._download_json('https://www.epidemicsound.com/json/track/' + video_id, video_id)
|
||||||
|
|
||||||
|
def fmt_or_none(f):
|
||||||
|
if not f.get('format'):
|
||||||
|
f['format'] = f.get('format_id')
|
||||||
|
elif not f.get('format_id'):
|
||||||
|
f['format_id'] = f['format']
|
||||||
|
if not (f['url'] and f['format']):
|
||||||
|
return
|
||||||
|
if f.get('format_note'):
|
||||||
|
f['format_note'] = 'track ID ' + f['format_note']
|
||||||
|
f['preference'] = -1 if f['format'] == 'full' else -2
|
||||||
|
return f
|
||||||
|
|
||||||
|
formats = traverse_obj(json_data, (
|
||||||
|
'stems', T(dict.items), Ellipsis, {
|
||||||
|
'format': (0, T(txt_or_none)),
|
||||||
|
'format_note': (1, 's3TrackId', T(txt_or_none)),
|
||||||
|
'format_id': (1, 'stemType', T(txt_or_none)),
|
||||||
|
'url': (1, 'lqMp3Url', T(url_or_none)),
|
||||||
|
}, T(fmt_or_none)))
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
info = traverse_obj(json_data, {
|
||||||
|
'id': ('id', T(txt_or_none)),
|
||||||
|
'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
|
||||||
|
'title': ('title', T(txt_or_none)),
|
||||||
|
'duration': ('length', T(float_or_none)),
|
||||||
|
'timestamp': ('added', T(unified_timestamp)),
|
||||||
|
'thumbnail': (('imageUrl', 'cover'), T(url_or_none)),
|
||||||
|
'age_limit': ('isExplicit', T(lambda b: 18 if b else None)),
|
||||||
|
'release_timestamp': ('releaseDate', T(unified_timestamp)),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
|
info.update(traverse_obj(json_data, {
|
||||||
|
'categories': ('genres', Ellipsis, 'tag', T(txt_or_none)),
|
||||||
|
'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
|
||||||
|
}))
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'display_id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
|
||||||
|
return info
|
@ -159,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
|
|||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
|
from .caffeine import CaffeineTVIE
|
||||||
from .callin import CallinIE
|
from .callin import CallinIE
|
||||||
from .camdemy import (
|
from .camdemy import (
|
||||||
CamdemyIE,
|
CamdemyIE,
|
||||||
@ -226,6 +227,7 @@ from .ciscolive import (
|
|||||||
CiscoLiveSearchIE,
|
CiscoLiveSearchIE,
|
||||||
)
|
)
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
|
from .clipchamp import ClipchampIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clippit import ClippitIE
|
from .clippit import ClippitIE
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
@ -295,6 +297,10 @@ from .dbtv import DBTVIE
|
|||||||
from .dctp import DctpTvIE
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .democracynow import DemocracynowIE
|
from .democracynow import DemocracynowIE
|
||||||
|
from .dlf import (
|
||||||
|
DLFCorpusIE,
|
||||||
|
DLFIE,
|
||||||
|
)
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .digg import DiggIE
|
from .digg import DiggIE
|
||||||
@ -352,6 +358,7 @@ from .ellentube import (
|
|||||||
from .elpais import ElPaisIE
|
from .elpais import ElPaisIE
|
||||||
from .embedly import EmbedlyIE
|
from .embedly import EmbedlyIE
|
||||||
from .engadget import EngadgetIE
|
from .engadget import EngadgetIE
|
||||||
|
from .epidemicsound import EpidemicSoundIE
|
||||||
from .eporner import EpornerIE
|
from .eporner import EpornerIE
|
||||||
from .eroprofile import EroProfileIE
|
from .eroprofile import EroProfileIE
|
||||||
from .escapist import EscapistIE
|
from .escapist import EscapistIE
|
||||||
@ -376,7 +383,6 @@ from .fc2 import (
|
|||||||
FC2EmbedIE,
|
FC2EmbedIE,
|
||||||
)
|
)
|
||||||
from .fczenit import FczenitIE
|
from .fczenit import FczenitIE
|
||||||
from .filemoon import FileMoonIE
|
|
||||||
from .fifa import FifaIE
|
from .fifa import FifaIE
|
||||||
from .filmon import (
|
from .filmon import (
|
||||||
FilmOnIE,
|
FilmOnIE,
|
||||||
@ -437,6 +443,7 @@ from .gamespot import GameSpotIE
|
|||||||
from .gamestar import GameStarIE
|
from .gamestar import GameStarIE
|
||||||
from .gaskrank import GaskrankIE
|
from .gaskrank import GaskrankIE
|
||||||
from .gazeta import GazetaIE
|
from .gazeta import GazetaIE
|
||||||
|
from .gbnews import GBNewsIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
from .gedidigital import GediDigitalIE
|
from .gedidigital import GediDigitalIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
@ -444,6 +451,13 @@ from .gfycat import GfycatIE
|
|||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
from .giga import GigaIE
|
from .giga import GigaIE
|
||||||
from .glide import GlideIE
|
from .glide import GlideIE
|
||||||
|
from .globalplayer import (
|
||||||
|
GlobalPlayerLiveIE,
|
||||||
|
GlobalPlayerLivePlaylistIE,
|
||||||
|
GlobalPlayerAudioIE,
|
||||||
|
GlobalPlayerAudioEpisodeIE,
|
||||||
|
GlobalPlayerVideoIE
|
||||||
|
)
|
||||||
from .globo import (
|
from .globo import (
|
||||||
GloboIE,
|
GloboIE,
|
||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
@ -884,21 +898,13 @@ from .ooyala import (
|
|||||||
)
|
)
|
||||||
from .ora import OraTVIE
|
from .ora import OraTVIE
|
||||||
from .orf import (
|
from .orf import (
|
||||||
ORFTVthekIE,
|
ORFONIE,
|
||||||
ORFFM4IE,
|
ORFONLiveIE,
|
||||||
ORFFM4StoryIE,
|
ORFFM4StoryIE,
|
||||||
ORFOE1IE,
|
|
||||||
ORFOE3IE,
|
|
||||||
ORFNOEIE,
|
|
||||||
ORFWIEIE,
|
|
||||||
ORFBGLIE,
|
|
||||||
ORFOOEIE,
|
|
||||||
ORFSTMIE,
|
|
||||||
ORFKTNIE,
|
|
||||||
ORFSBGIE,
|
|
||||||
ORFTIRIE,
|
|
||||||
ORFVBGIE,
|
|
||||||
ORFIPTVIE,
|
ORFIPTVIE,
|
||||||
|
ORFPodcastIE,
|
||||||
|
ORFRadioIE,
|
||||||
|
ORFRadioCollectionIE,
|
||||||
)
|
)
|
||||||
from .outsidetv import OutsideTVIE
|
from .outsidetv import OutsideTVIE
|
||||||
from .packtpub import (
|
from .packtpub import (
|
||||||
@ -975,6 +981,10 @@ from .pornhub import (
|
|||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornovoisines import PornoVoisinesIE
|
from .pornovoisines import PornoVoisinesIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
|
from .pr0gramm import (
|
||||||
|
Pr0grammIE,
|
||||||
|
Pr0grammStaticIE,
|
||||||
|
)
|
||||||
from .puhutv import (
|
from .puhutv import (
|
||||||
PuhuTVIE,
|
PuhuTVIE,
|
||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
@ -1071,6 +1081,10 @@ from .rutube import (
|
|||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .ruutu import RuutuIE
|
from .ruutu import RuutuIE
|
||||||
from .ruv import RuvIE
|
from .ruv import RuvIE
|
||||||
|
from .s4c import (
|
||||||
|
S4CIE,
|
||||||
|
S4CSeriesIE,
|
||||||
|
)
|
||||||
from .safari import (
|
from .safari import (
|
||||||
SafariIE,
|
SafariIE,
|
||||||
SafariApiIE,
|
SafariApiIE,
|
||||||
@ -1565,6 +1579,7 @@ from .weibo import (
|
|||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
|
from .whyp import WhypIE
|
||||||
from .wistia import (
|
from .wistia import (
|
||||||
WistiaIE,
|
WistiaIE,
|
||||||
WistiaPlaylistIE,
|
WistiaPlaylistIE,
|
||||||
@ -1630,7 +1645,15 @@ from .younow import (
|
|||||||
YouNowChannelIE,
|
YouNowChannelIE,
|
||||||
YouNowMomentIE,
|
YouNowMomentIE,
|
||||||
)
|
)
|
||||||
from .youporn import YouPornIE
|
from .youporn import (
|
||||||
|
YouPornIE,
|
||||||
|
YouPornCategoryIE,
|
||||||
|
YouPornChannelIE,
|
||||||
|
YouPornCollectionIE,
|
||||||
|
YouPornStarIE,
|
||||||
|
YouPornTagIE,
|
||||||
|
YouPornVideosIE,
|
||||||
|
)
|
||||||
from .yourporn import YourPornIE
|
from .yourporn import YourPornIE
|
||||||
from .yourupload import YourUploadIE
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
@ -1678,7 +1701,3 @@ from .zingmp3 import (
|
|||||||
)
|
)
|
||||||
from .zoom import ZoomIE
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
from .pr0gramm import (
|
|
||||||
Pr0grammIE,
|
|
||||||
Pr0grammStaticIE,
|
|
||||||
)
|
|
||||||
|
@ -1,43 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
decode_packed_codes,
|
|
||||||
js_to_json,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FileMoonIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
|
|
||||||
'md5': '5a713742f57ac4aef29b74733e8dda01',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'dw40rxrzruqz',
|
|
||||||
'title': 'dw40rxrzruqz',
|
|
||||||
'ext': 'mp4'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
|
|
||||||
packed = matches[-1]
|
|
||||||
unpacked = decode_packed_codes(packed)
|
|
||||||
jwplayer_sources = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
|
|
||||||
video_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._generic_title(url) or video_id,
|
|
||||||
'formats': formats
|
|
||||||
}
|
|
139
youtube_dl/extractor/gbnews.py
Normal file
139
youtube_dl/extractor/gbnews.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GBNewsIE(InfoExtractor):
|
||||||
|
IE_DESC = 'GB News clips, features and live stream'
|
||||||
|
|
||||||
|
# \w+ is normally shows or news, but apparently any word redirects to the correct URL
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
|
||||||
|
|
||||||
|
_PLATFORM = 'safari'
|
||||||
|
_SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '106889',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Andrew Neil's message to companies choosing to boycott GB News",
|
||||||
|
'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351',
|
||||||
|
},
|
||||||
|
'skip': '404 not found',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '52264136',
|
||||||
|
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
|
||||||
|
'description': 'The post was criticised by former employers of the broadcaster',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.gbnews.uk/watchlive',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1069',
|
||||||
|
'display_id': 'watchlive',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GB News Live',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url).split('/')[-1]
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
# extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341
|
||||||
|
'''
|
||||||
|
<div id="video-106908"
|
||||||
|
class="simplestream"
|
||||||
|
data-id="GB001"
|
||||||
|
data-type="vod"
|
||||||
|
data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs"
|
||||||
|
data-token="f9c317c727dc07f515b20036c8ef14a6"
|
||||||
|
data-expiry="1624300052"
|
||||||
|
data-uvid="37900558"
|
||||||
|
data-poster="https://thumbnails.simplestreamcdn.com/gbnews/ondemand/37900558.jpg?width=700&"
|
||||||
|
data-npaw="false"
|
||||||
|
data-env="production">
|
||||||
|
'''
|
||||||
|
# exception if no match
|
||||||
|
video_data = self._search_regex(
|
||||||
|
r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)',
|
||||||
|
webpage, 'video data')
|
||||||
|
|
||||||
|
video_data = extract_attributes(video_data)
|
||||||
|
ss_id = video_data.get('data-id')
|
||||||
|
if not ss_id:
|
||||||
|
raise ExtractorError('Simplestream ID not found')
|
||||||
|
|
||||||
|
json_data = self._download_json(
|
||||||
|
self._SSMP_URL, display_id,
|
||||||
|
note='Downloading Simplestream JSON metadata',
|
||||||
|
errnote='Unable to download Simplestream JSON metadata',
|
||||||
|
query={
|
||||||
|
'id': ss_id,
|
||||||
|
'env': video_data.get('data-env', 'production'),
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
meta_url = traverse_obj(json_data, ('response', 'api_hostname'))
|
||||||
|
if not meta_url:
|
||||||
|
raise ExtractorError('No API host found')
|
||||||
|
|
||||||
|
uvid = video_data['data-uvid']
|
||||||
|
dtype = video_data.get('data-type')
|
||||||
|
stream_data = self._download_json(
|
||||||
|
'%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid),
|
||||||
|
uvid,
|
||||||
|
query={
|
||||||
|
'key': video_data.get('data-key'),
|
||||||
|
'platform': self._PLATFORM,
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
'Token': video_data.get('data-token'),
|
||||||
|
'Token-Expiry': video_data.get('data-expiry'),
|
||||||
|
'Uvid': uvid,
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
stream_url = traverse_obj(stream_data, (
|
||||||
|
'response', 'stream', T(url_or_none)))
|
||||||
|
if not stream_url:
|
||||||
|
raise ExtractorError('No stream data/URL')
|
||||||
|
|
||||||
|
# now known to be a dict
|
||||||
|
stream_data = stream_data['response']
|
||||||
|
drm = stream_data.get('drm')
|
||||||
|
if drm:
|
||||||
|
self.report_drm(uvid)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
stream_url, uvid, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
fatal=False)
|
||||||
|
# exception if no formats
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': uvid,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': (traverse_obj(stream_data, ('title', T(txt_or_none)))
|
||||||
|
or self._og_search_title(webpage, default=None)
|
||||||
|
or display_id.replace('-', ' ').capitalize()),
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none)))
|
||||||
|
or self._og_search_thumbnail(webpage)),
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': (dtype == 'live') or None,
|
||||||
|
}
|
273
youtube_dl/extractor/globalplayer.py
Normal file
273
youtube_dl/extractor/globalplayer.py
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
join_nonempty,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
str_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
|
urlhandle_detect_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
def _get_page_props(self, url, video_id):
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||||
|
|
||||||
|
def _request_ext(self, url, video_id):
|
||||||
|
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
|
||||||
|
url, video_id, note='Determining source extension'))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clean_desc(x):
|
||||||
|
x = clean_html(x)
|
||||||
|
if x:
|
||||||
|
x = x.replace('\xa0', ' ')
|
||||||
|
return x
|
||||||
|
|
||||||
|
def _extract_audio(self, episode, series):
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'vcodec': 'none',
|
||||||
|
}, traverse_obj(series, {
|
||||||
|
'series': 'title',
|
||||||
|
'series_id': 'id',
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'uploader': 'itunesAuthor', # podcasts only
|
||||||
|
}), traverse_obj(episode, {
|
||||||
|
'id': 'id',
|
||||||
|
'description': ('description', T(self._clean_desc)),
|
||||||
|
'duration': ('duration', T(parse_duration)),
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'url': 'streamUrl',
|
||||||
|
'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
|
||||||
|
'title': 'title',
|
||||||
|
}, get_all=False), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2mx1E',
|
||||||
|
'ext': 'aac',
|
||||||
|
'display_id': 'smoothchill-uk',
|
||||||
|
'title': 're:^Smooth Chill.+$',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
|
||||||
|
'description': 'Music To Chill To',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# national station
|
||||||
|
'url': 'https://www.globalplayer.com/live/heart/uk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2mwx4',
|
||||||
|
'ext': 'aac',
|
||||||
|
'description': 'turn up the feel good!',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
'title': 're:^Heart UK.+$',
|
||||||
|
'display_id': 'heart-uk',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# regional variation
|
||||||
|
'url': 'https://www.globalplayer.com/live/heart/london/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'AMqg',
|
||||||
|
'ext': 'aac',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||||
|
'title': 're:^Heart London.+$',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
'display_id': 'heart-london',
|
||||||
|
'description': 'turn up the feel good!',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
station = self._get_page_props(url, video_id)['station']
|
||||||
|
stream_url = station['streamUrl']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': station['id'],
|
||||||
|
'display_id': (
|
||||||
|
join_nonempty('brandSlug', 'slug', from_dict=station)
|
||||||
|
or station.get('legacyStationPrefix')),
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': self._request_ext(stream_url, video_id),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'is_live': True,
|
||||||
|
}, {
|
||||||
|
'title': self._live_title(traverse_obj(
|
||||||
|
station, (('name', 'brandName'), T(str_or_none)),
|
||||||
|
get_all=False)),
|
||||||
|
}, traverse_obj(station, {
|
||||||
|
'description': 'tagline',
|
||||||
|
'thumbnail': 'brandLogo',
|
||||||
|
}), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# "live playlist"
|
||||||
|
'url': 'https://www.globalplayer.com/playlists/8bLk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8bLk',
|
||||||
|
'ext': 'aac',
|
||||||
|
# 'live_status': 'is_live',
|
||||||
|
'is_live': True,
|
||||||
|
'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
|
||||||
|
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
|
||||||
|
'title': 're:Classic FM Hall of Fame.+$'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
station = self._get_page_props(url, video_id)['playlistData']
|
||||||
|
stream_url = station['streamUrl']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': self._request_ext(stream_url, video_id),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'is_live': True,
|
||||||
|
}, traverse_obj(station, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': ('description', T(self._clean_desc)),
|
||||||
|
'thumbnail': 'image',
|
||||||
|
}), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# podcast
|
||||||
|
'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '42KuaM',
|
||||||
|
'title': 'Filthy Ritual',
|
||||||
|
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||||
|
'categories': ['Society & Culture', 'True Crime'],
|
||||||
|
'uploader': 'Global',
|
||||||
|
'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio catchup
|
||||||
|
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46vyD7z',
|
||||||
|
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||||
|
'title': 'Nick Ferrari',
|
||||||
|
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
|
props = self._get_page_props(url, video_id)
|
||||||
|
series = props['podcastInfo'] if podcast else props['catchupInfo']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
|
||||||
|
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
|
||||||
|
'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
|
||||||
|
}, traverse_obj(series, {
|
||||||
|
'description': ('description', T(self._clean_desc)),
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'title': 'title',
|
||||||
|
'uploader': 'itunesAuthor', # podcasts only
|
||||||
|
}), rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# podcast
|
||||||
|
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7DrfNnE',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Filthy Ritual - Trailer',
|
||||||
|
'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
|
||||||
|
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||||
|
'duration': 225.0,
|
||||||
|
'timestamp': 1681254900,
|
||||||
|
'series': 'Filthy Ritual',
|
||||||
|
'series_id': '42KuaM',
|
||||||
|
'upload_date': '20230411',
|
||||||
|
'uploader': 'Global',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio catchup
|
||||||
|
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
|
||||||
|
'only_matching': True,
|
||||||
|
# expired: refresh the details with a current show for a full test
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'timestamp': 1682056800,
|
||||||
|
'series': 'Nick Ferrari',
|
||||||
|
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||||
|
'upload_date': '20230421',
|
||||||
|
'series_id': '46vyD7z',
|
||||||
|
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||||
|
'title': 'Nick Ferrari',
|
||||||
|
'duration': 10800.0,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
|
props = self._get_page_props(url, video_id)
|
||||||
|
episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
|
||||||
|
|
||||||
|
return self._extract_audio(
|
||||||
|
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2JsSZ7Gm2uP',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
|
||||||
|
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
|
||||||
|
'upload_date': '20230420',
|
||||||
|
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._get_page_props(url, video_id)['videoData']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
}, traverse_obj(meta, {
|
||||||
|
'url': 'url',
|
||||||
|
'thumbnail': ('image', 'url'),
|
||||||
|
'title': 'title',
|
||||||
|
'upload_date': ('publish_date', T(unified_strdate)),
|
||||||
|
'description': 'description',
|
||||||
|
}), rev=True)
|
@ -1,101 +1,267 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
ExtractorError,
|
parse_iso8601,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ImgurIE(InfoExtractor):
|
class ImgurBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
|
# hard-coded value, as also used by ArchiveTeam
|
||||||
|
_CLIENT_ID = '546c25a59c58ad7'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _imgur_result(cls, item_id):
|
||||||
|
return cls.url_result('imgur:%s' % item_id, ImgurIE.ie_key(), item_id)
|
||||||
|
|
||||||
|
def _call_api(self, endpoint, video_id, **kwargs):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.imgur.com/post/v1/%s/%s?client_id=%s&include=media,account' % (endpoint, video_id, self._CLIENT_ID),
|
||||||
|
video_id, **kwargs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_description(s):
|
||||||
|
if 'Discover the magic of the internet at Imgur' in s:
|
||||||
|
return None
|
||||||
|
return txt_or_none(s)
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurIE(ImgurBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
(?:
|
||||||
|
https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)|
|
||||||
|
imgur:
|
||||||
|
)(?P<id>[a-zA-Z0-9]+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
'url': 'https://imgur.com/A61SaA1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'A61SaA1',
|
'id': 'A61SaA1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
|
||||||
|
'timestamp': 1416446068,
|
||||||
|
'upload_date': '20141120',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/A61SaA1',
|
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# no title
|
# previously, no title
|
||||||
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
'url': 'https://i.imgur.com/jxBXAMC.gifv',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'jxBXAMC',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fahaka puffer feeding',
|
||||||
|
'timestamp': 1533835503,
|
||||||
|
'upload_date': '20180809',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_twitter_formats(self, html, tw_id='twitter', **kwargs):
|
||||||
|
fatal = kwargs.pop('fatal', False)
|
||||||
|
tw_stream = self._html_search_meta('twitter:player:stream', html, fatal=fatal, **kwargs)
|
||||||
|
if not tw_stream:
|
||||||
|
return []
|
||||||
|
ext = mimetype2ext(self._html_search_meta(
|
||||||
|
'twitter:player:stream:content_type', html, default=None))
|
||||||
|
width, height = (int_or_none(self._html_search_meta('twitter:player:' + v, html, default=None))
|
||||||
|
for v in ('width', 'height'))
|
||||||
|
return [{
|
||||||
|
'format_id': tw_id,
|
||||||
|
'url': tw_stream,
|
||||||
|
'ext': ext or determine_ext(tw_stream),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api('media', video_id, fatal=False, expected_status=404)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
|
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id, fatal=not data) or ''
|
||||||
|
|
||||||
width = int_or_none(self._og_search_property(
|
if not traverse_obj(data, ('media', 0, (
|
||||||
'video:width', webpage, default=None))
|
('type', T(lambda t: t == 'video' or None)),
|
||||||
height = int_or_none(self._og_search_property(
|
('metadata', 'is_animated'))), get_all=False):
|
||||||
'video:height', webpage, default=None))
|
raise ExtractorError(
|
||||||
|
'%s is not a video or animated image' % video_id,
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
media_fmt = traverse_obj(data, ('media', 0, {
|
||||||
|
'url': ('url', T(url_or_none)),
|
||||||
|
'ext': 'ext',
|
||||||
|
'width': ('width', T(int_or_none)),
|
||||||
|
'height': ('height', T(int_or_none)),
|
||||||
|
'filesize': ('size', T(int_or_none)),
|
||||||
|
'acodec': ('metadata', 'has_sound', T(lambda b: None if b else 'none')),
|
||||||
|
}))
|
||||||
|
|
||||||
|
media_url = traverse_obj(media_fmt, 'url')
|
||||||
|
if media_url:
|
||||||
|
if not media_fmt.get('ext'):
|
||||||
|
media_fmt['ext'] = mimetype2ext(traverse_obj(
|
||||||
|
data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
|
||||||
|
if traverse_obj(data, ('media', 0, 'type')) == 'image':
|
||||||
|
media_fmt['acodec'] = 'none'
|
||||||
|
media_fmt.setdefault('preference', -10)
|
||||||
|
|
||||||
|
tw_formats = self._extract_twitter_formats(webpage)
|
||||||
|
if traverse_obj(tw_formats, (0, 'url')) == media_url:
|
||||||
|
tw_formats = []
|
||||||
|
else:
|
||||||
|
# maybe this isn't an animated image/video?
|
||||||
|
self._check_formats(tw_formats, video_id)
|
||||||
|
|
||||||
video_elements = self._search_regex(
|
video_elements = self._search_regex(
|
||||||
r'(?s)<div class="video-elements">(.*?)</div>',
|
r'(?s)<div class="video-elements">(.*?)</div>',
|
||||||
webpage, 'video elements', default=None)
|
webpage, 'video elements', default=None)
|
||||||
if not video_elements:
|
if not (video_elements or tw_formats or media_url):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'No sources found for video %s. Maybe an image?' % video_id,
|
'No sources found for video %s. Maybe a plain image?' % video_id,
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
formats = []
|
def mung_format(fmt, *extra):
|
||||||
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
|
fmt.update({
|
||||||
formats.append({
|
|
||||||
'format_id': m.group('type').partition('/')[2],
|
|
||||||
'url': self._proto_relative_url(m.group('src')),
|
|
||||||
'ext': mimetype2ext(m.group('type')),
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'User-Agent': 'youtube-dl (like wget)',
|
'User-Agent': 'youtube-dl (like wget)',
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
for d in extra:
|
||||||
|
fmt.update(d)
|
||||||
|
return fmt
|
||||||
|
|
||||||
|
if video_elements:
|
||||||
|
def og_get_size(media_type):
|
||||||
|
return dict((p, int_or_none(self._og_search_property(
|
||||||
|
':'.join((media_type, p)), webpage, default=None)))
|
||||||
|
for p in ('width', 'height'))
|
||||||
|
|
||||||
|
size = og_get_size('video')
|
||||||
|
if all(v is None for v in size.values()):
|
||||||
|
size = og_get_size('image')
|
||||||
|
|
||||||
|
formats = traverse_obj(
|
||||||
|
re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
|
||||||
|
(Ellipsis, {
|
||||||
|
'format_id': ('type', T(lambda s: s.partition('/')[2])),
|
||||||
|
'url': ('src', T(self._proto_relative_url)),
|
||||||
|
'ext': ('type', T(mimetype2ext)),
|
||||||
|
}, T(lambda f: mung_format(f, size))))
|
||||||
|
|
||||||
gif_json = self._search_regex(
|
gif_json = self._search_regex(
|
||||||
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
|
||||||
webpage, 'GIF code', fatal=False)
|
webpage, 'GIF code', fatal=False)
|
||||||
if gif_json:
|
MUST_BRANCH = (None, T(lambda _: None))
|
||||||
gifd = self._parse_json(
|
formats.extend(traverse_obj(gif_json, (
|
||||||
gif_json, video_id, transform_source=js_to_json)
|
T(lambda j: self._parse_json(
|
||||||
formats.append({
|
j, video_id, transform_source=js_to_json, fatal=False)), {
|
||||||
|
'url': ('gifUrl', T(self._proto_relative_url)),
|
||||||
|
'filesize': ('size', T(int_or_none)),
|
||||||
|
}, T(lambda f: mung_format(f, size, {
|
||||||
'format_id': 'gif',
|
'format_id': 'gif',
|
||||||
'preference': -10,
|
'preference': -10, # gifs are worse than videos
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'ext': 'gif',
|
'ext': 'gif',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'vcodec': 'gif',
|
'vcodec': 'gif',
|
||||||
'container': 'gif',
|
'container': 'gif',
|
||||||
'url': self._proto_relative_url(gifd['gifUrl']),
|
})), MUST_BRANCH)))
|
||||||
'filesize': gifd.get('size'),
|
else:
|
||||||
'http_headers': {
|
formats = []
|
||||||
'User-Agent': 'youtube-dl (like wget)',
|
|
||||||
},
|
# maybe add formats from JSON or page Twitter metadata
|
||||||
})
|
if not any((u == media_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
|
||||||
|
formats.append(mung_format(media_fmt))
|
||||||
|
tw_url = traverse_obj(tw_formats, (0, 'url'))
|
||||||
|
if not any((u == tw_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
|
||||||
|
formats.extend(mung_format(f) for f in tw_formats)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return merge_dicts(traverse_obj(data, {
|
||||||
|
'uploader_id': ('account_id', T(txt_or_none),
|
||||||
|
T(lambda a: a if int_or_none(a) != 0 else None)),
|
||||||
|
'uploader': ('account', 'username', T(txt_or_none)),
|
||||||
|
'uploader_url': ('account', 'avatar_url', T(url_or_none)),
|
||||||
|
'like_count': ('upvote_count', T(int_or_none)),
|
||||||
|
'dislike_count': ('downvote_count', T(int_or_none)),
|
||||||
|
'comment_count': ('comment_count', T(int_or_none)),
|
||||||
|
'age_limit': ('is_mature', T(lambda x: 18 if x else None)),
|
||||||
|
'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
|
||||||
|
'release_timestamp': ('created_at', T(parse_iso8601)),
|
||||||
|
}, get_all=False), traverse_obj(data, ('media', 0, 'metadata', {
|
||||||
|
'title': ('title', T(txt_or_none)),
|
||||||
|
'description': ('description', T(self.get_description)),
|
||||||
|
'duration': ('duration', T(float_or_none)),
|
||||||
|
'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
|
||||||
|
'release_timestamp': ('created_at', T(parse_iso8601)),
|
||||||
|
})), {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': self._og_search_title(webpage, default=video_id),
|
'title': self._og_search_title(webpage, default='Imgur video ' + video_id),
|
||||||
}
|
'description': self.get_description(self._og_search_description(webpage)),
|
||||||
|
'thumbnail': url_or_none(self._html_search_meta('thumbnailUrl', webpage, default=None)),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
class ImgurGalleryIE(InfoExtractor):
|
class ImgurGalleryBaseIE(ImgurBaseIE):
|
||||||
|
_GALLERY = True
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
gallery_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
|
||||||
|
|
||||||
|
info = traverse_obj(data, {
|
||||||
|
'title': ('title', T(txt_or_none)),
|
||||||
|
'description': ('description', T(self.get_description)),
|
||||||
|
})
|
||||||
|
|
||||||
|
if traverse_obj(data, 'is_album'):
|
||||||
|
|
||||||
|
def yield_media_ids():
|
||||||
|
for m_id in traverse_obj(data, (
|
||||||
|
'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
|
||||||
|
'id', T(txt_or_none))):
|
||||||
|
yield m_id
|
||||||
|
|
||||||
|
# if a gallery with exactly one video, apply album metadata to video
|
||||||
|
media_id = (
|
||||||
|
self._GALLERY
|
||||||
|
and traverse_obj(data, ('image_count', T(lambda c: c == 1)))
|
||||||
|
and next(yield_media_ids(), None))
|
||||||
|
|
||||||
|
if not media_id:
|
||||||
|
result = self.playlist_result(
|
||||||
|
map(self._imgur_result, yield_media_ids()), gallery_id)
|
||||||
|
result.update(info)
|
||||||
|
return result
|
||||||
|
gallery_id = media_id
|
||||||
|
|
||||||
|
result = self._imgur_result(gallery_id)
|
||||||
|
info['_type'] = 'url_transparent'
|
||||||
|
result.update(info)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class ImgurGalleryIE(ImgurGalleryBaseIE):
|
||||||
IE_NAME = 'imgur:gallery'
|
IE_NAME = 'imgur:gallery'
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
@ -106,49 +272,93 @@ class ImgurGalleryIE(InfoExtractor):
|
|||||||
'title': 'Adding faces make every GIF better',
|
'title': 'Adding faces make every GIF better',
|
||||||
},
|
},
|
||||||
'playlist_count': 25,
|
'playlist_count': 25,
|
||||||
|
'skip': 'Zoinks! You\'ve taken a wrong turn.',
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: static images - replace with animated/video gallery
|
||||||
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
'url': 'http://imgur.com/topic/Aww/ll5Vk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://imgur.com/gallery/YcAQlkx',
|
'url': 'https://imgur.com/gallery/YcAQlkx',
|
||||||
|
'add_ies': ['Imgur'],
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'YcAQlkx',
|
'id': 'YcAQlkx',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
|
||||||
}
|
'timestamp': 1358554297,
|
||||||
|
'upload_date': '20130119',
|
||||||
|
'uploader_id': '1648642',
|
||||||
|
'uploader': 'wittyusernamehere',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# TODO: static image - replace with animated/video gallery
|
||||||
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
'url': 'http://imgur.com/topic/Funny/N8rOudd',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||||
'only_matching': True,
|
'add_ies': ['Imgur'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VQcQPhM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The boss is here',
|
||||||
|
'timestamp': 1476494751,
|
||||||
|
'upload_date': '20161015',
|
||||||
|
'uploader_id': '19138530',
|
||||||
|
'uploader': 'thematrixcam',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
# from PR #16674
|
||||||
|
{
|
||||||
|
'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6lAn9VQ',
|
||||||
|
'title': 'Penguins !',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
|
||||||
|
'add_ies': ['Imgur'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ZVMv45i',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Intruder',
|
||||||
|
'timestamp': 1528129683,
|
||||||
|
'upload_date': '20180604',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/t/unmuted/wXSK0YH',
|
||||||
|
'add_ies': ['Imgur'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'JCAP4io',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 're:I got the blues$',
|
||||||
|
'description': 'Luka’s vocal stylings.\n\nFP edit: don’t encourage me. I’ll never stop posting Luka and friends.',
|
||||||
|
'timestamp': 1527809525,
|
||||||
|
'upload_date': '20180531',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
gallery_id = self._match_id(url)
|
|
||||||
|
|
||||||
data = self._download_json(
|
class ImgurAlbumIE(ImgurGalleryBaseIE):
|
||||||
'https://imgur.com/gallery/%s.json' % gallery_id,
|
|
||||||
gallery_id)['data']['image']
|
|
||||||
|
|
||||||
if data.get('is_album'):
|
|
||||||
entries = [
|
|
||||||
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
|
|
||||||
for image in data['album_images']['images'] if image.get('hash')]
|
|
||||||
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
|
|
||||||
|
|
||||||
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
|
|
||||||
|
|
||||||
|
|
||||||
class ImgurAlbumIE(ImgurGalleryIE):
|
|
||||||
IE_NAME = 'imgur:album'
|
IE_NAME = 'imgur:album'
|
||||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
_GALLERY = False
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# TODO: only static images - replace with animated/video gallery
|
||||||
'url': 'http://imgur.com/a/j6Orj',
|
'url': 'http://imgur.com/a/j6Orj',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': 'j6Orj',
|
|
||||||
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
|
|
||||||
},
|
},
|
||||||
'playlist_count': 12,
|
# from PR #21693
|
||||||
|
{
|
||||||
|
'url': 'https://imgur.com/a/iX265HX',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'iX265HX',
|
||||||
|
'title': 'enen-no-shouboutai'
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://imgur.com/a/8pih2Ed',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8pih2Ed'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
}]
|
}]
|
||||||
|
@ -59,7 +59,7 @@ class ITVBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _vanilla_ua_header():
|
def _vanilla_ua_header():
|
||||||
return {'User-agent': 'Mozilla/5.0'}
|
return {'User-Agent': 'Mozilla/5.0'}
|
||||||
|
|
||||||
def _download_webpage_handle(self, url, video_id, *args, **kwargs):
|
def _download_webpage_handle(self, url, video_id, *args, **kwargs):
|
||||||
# specialised to (a) use vanilla UA (b) detect geo-block
|
# specialised to (a) use vanilla UA (b) detect geo-block
|
||||||
@ -69,7 +69,7 @@ class ITVBaseIE(InfoExtractor):
|
|||||||
'user_agent' not in params
|
'user_agent' not in params
|
||||||
and not any(re.match(r'(?i)user-agent\s*:', h)
|
and not any(re.match(r'(?i)user-agent\s*:', h)
|
||||||
for h in (params.get('headers') or []))
|
for h in (params.get('headers') or []))
|
||||||
and 'User-agent' not in (kwargs.get('headers') or {})):
|
and 'User-Agent' not in (kwargs.get('headers') or {})):
|
||||||
|
|
||||||
kwargs.setdefault('headers', {})
|
kwargs.setdefault('headers', {})
|
||||||
kwargs['headers'] = self._vanilla_ua_header()
|
kwargs['headers'] = self._vanilla_ua_header()
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
@ -10,7 +11,7 @@ from ..compat import (
|
|||||||
compat_ord,
|
compat_ord,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_zip
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -24,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor):
|
|||||||
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
def _call_api(self, object_type, object_fields, display_id, username, slug=None):
|
||||||
lookup_key = object_type + 'Lookup'
|
lookup_key = object_type + 'Lookup'
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://www.mixcloud.com/graphql', display_id, query={
|
'https://app.mixcloud.com/graphql', display_id, query={
|
||||||
'query': '''{
|
'query': '''{
|
||||||
%s(lookup: {username: "%s"%s}) {
|
%s(lookup: {username: "%s"%s}) {
|
||||||
%s
|
%s
|
||||||
@ -44,7 +45,7 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Cryptkeeper',
|
'title': 'Cryptkeeper',
|
||||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||||
'uploader': 'Daniel Holbach',
|
'uploader': 'dholbach', # was: 'Daniel Holbach',
|
||||||
'uploader_id': 'dholbach',
|
'uploader_id': 'dholbach',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -57,7 +58,7 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
|
'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
'description': r're:Last week Dan Snaith aka Caribou swung by the Brownswood.{136}',
|
||||||
'uploader': 'Gilles Peterson Worldwide',
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
'uploader_id': 'gillespeterson',
|
'uploader_id': 'gillespeterson',
|
||||||
'thumbnail': 're:https?://.*',
|
'thumbnail': 're:https?://.*',
|
||||||
@ -65,6 +66,23 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
'timestamp': 1422987057,
|
'timestamp': 1422987057,
|
||||||
'upload_date': '20150203',
|
'upload_date': '20150203',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': '404 not found',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mixcloud.com/gillespeterson/carnival-m%C3%BAsica-popular-brasileira-mix/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gillespeterson_carnival-música-popular-brasileira-mix',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Carnival Música Popular Brasileira Mix',
|
||||||
|
'description': r're:Gilles was recently in Brazil to play at Boiler Room.{208}',
|
||||||
|
'timestamp': 1454347174,
|
||||||
|
'upload_date': '20160201',
|
||||||
|
'uploader': 'Gilles Peterson Worldwide',
|
||||||
|
'uploader_id': 'gillespeterson',
|
||||||
|
'thumbnail': 're:https?://.*',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -76,10 +94,10 @@ class MixcloudIE(MixcloudBaseIE):
|
|||||||
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
|
||||||
return ''.join([
|
return ''.join([
|
||||||
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
compat_chr(compat_ord(ch) ^ compat_ord(k))
|
||||||
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
|
for ch, k in zip(ciphertext, itertools.cycle(key))])
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
username, slug = re.match(self._VALID_URL, url).groups()
|
username, slug = self._match_valid_url(url).groups()
|
||||||
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
|
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
|
||||||
track_id = '%s_%s' % (username, slug)
|
track_id = '%s_%s' % (username, slug)
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import subprocess
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_open as open,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
)
|
)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
|||||||
}
|
}
|
||||||
name'''
|
name'''
|
||||||
|
|
||||||
@ classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||||
|
|
||||||
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
|||||||
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
for music in traverse_obj(artist, (
|
||||||
|
'musics', 'nodes', lambda _, m: m['musicID'])):
|
||||||
yield self._parse_music(music)
|
yield self._parse_music(music)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
|||||||
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||||
'description': 'md5:7043342c09a224598e93546e98e49282',
|
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||||
'upload_date': '20161107',
|
'upload_date': '20161107',
|
||||||
'uploader_id': 'maiaramaraisaoficial',
|
'uploader_id': '@maiaramaraisaoficial',
|
||||||
'uploader': 'Maiara e Maraisa',
|
'uploader': 'Maiara e Maraisa',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
124
youtube_dl/extractor/s4c.py
Normal file
124
youtube_dl/extractor/s4c.py
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from functools import partial as partial_f
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class S4CIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.s4c.cymru/clic/programme/861362209',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '861362209',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Y Swn',
|
||||||
|
'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0',
|
||||||
|
'duration': 5340,
|
||||||
|
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.s4c.cymru/clic/programme/856636948',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '856636948',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Am Dro',
|
||||||
|
'duration': 2880,
|
||||||
|
'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe',
|
||||||
|
'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
details = self._download_json(
|
||||||
|
'https://www.s4c.cymru/df/full_prog_details',
|
||||||
|
video_id, query={
|
||||||
|
'lang': 'e',
|
||||||
|
'programme_id': video_id,
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
player_config = self._download_json(
|
||||||
|
'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={
|
||||||
|
'programme_id': video_id,
|
||||||
|
'signed': '0',
|
||||||
|
'lang': 'en',
|
||||||
|
'mode': 'od',
|
||||||
|
'appId': 'clic',
|
||||||
|
'streamName': '',
|
||||||
|
}, note='Downloading player config JSON')
|
||||||
|
|
||||||
|
m3u8_url = self._download_json(
|
||||||
|
'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={
|
||||||
|
'mode': 'od',
|
||||||
|
'application': 'clic',
|
||||||
|
'region': 'WW',
|
||||||
|
'extra': 'false',
|
||||||
|
'thirdParty': 'false',
|
||||||
|
'filename': player_config['filename'],
|
||||||
|
}, note='Downloading streaming urls JSON')['hls']
|
||||||
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))):
|
||||||
|
subtitles.setdefault(sub.get('3', 'en'), []).append({
|
||||||
|
'url': sub['0'],
|
||||||
|
'name': sub.get('1'),
|
||||||
|
})
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnail': url_or_none(player_config.get('poster')),
|
||||||
|
}, traverse_obj(details, ('full_prog_details', 0, {
|
||||||
|
'title': (('programme_title', 'series_title'), T(txt_or_none)),
|
||||||
|
'description': ('full_billing', T(txt_or_none)),
|
||||||
|
'duration': ('duration', T(partial_f(float_or_none, invscale=60))),
|
||||||
|
}), get_all=False),
|
||||||
|
rev=True)
|
||||||
|
|
||||||
|
|
||||||
|
class S4CSeriesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.s4c.cymru/clic/series/864982911',
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '864982911',
|
||||||
|
'title': 'Iaith ar Daith',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.s4c.cymru/clic/series/866852587',
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '866852587',
|
||||||
|
'title': 'FFIT Cymru',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
series_id = self._match_id(url)
|
||||||
|
series_details = self._download_json(
|
||||||
|
'https://www.s4c.cymru/df/series_details', series_id, query={
|
||||||
|
'lang': 'e',
|
||||||
|
'series_id': series_id,
|
||||||
|
'show_prog_in_series': 'Y'
|
||||||
|
}, note='Downloading series details JSON')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id)
|
||||||
|
for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))),
|
||||||
|
playlist_id=series_id, playlist_title=traverse_obj(
|
||||||
|
series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))
|
@ -3,17 +3,23 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TelewebionIE(InfoExtractor):
|
class TelewebionIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/(episode|clip)/(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.telewebion.com/#!/episode/1263668/',
|
'url': 'http://www.telewebion.com/episode/0x1b3139c/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1263668',
|
'id': '0x1b3139c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
|
'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://static\.telewebion\.com/episodeImages/.*/default',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -25,31 +31,24 @@ class TelewebionIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
secure_token = self._download_webpage(
|
episode_details = self._download_json('https://gateway.telewebion.ir/kandoo/episode/getEpisodeDetail/?EpisodeId={0}'.format(video_id), video_id)
|
||||||
'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
|
episode_details = episode_details['body']['queryEpisode'][0]
|
||||||
episode_details = self._download_json(
|
|
||||||
'http://m.s2.telewebion.com/op/op', video_id,
|
|
||||||
query={'action': 'getEpisodeDetails', 'episode_id': video_id})
|
|
||||||
|
|
||||||
m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
|
channel_id = episode_details['channel']['descriptor']
|
||||||
video_id, episode_details['file_path'], secure_token)
|
episode_image_id = episode_details.get('image')
|
||||||
|
episode_image = 'https://static.telewebion.com/episodeImages/{0}/default'.format(episode_image_id) if episode_image_id else None
|
||||||
|
|
||||||
|
m3u8_url = 'https://cdna.telewebion.com/{0}/episode/{1}/playlist.m3u8'.format(channel_id, video_id)
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
m3u8_url, video_id, ext='mp4', m3u8_id='hls',
|
||||||
|
entry_protocol='m3u8_native')
|
||||||
picture_paths = [
|
self._sort_formats(formats)
|
||||||
episode_details.get('picture_path'),
|
|
||||||
episode_details.get('large_picture_path'),
|
|
||||||
]
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': picture_path,
|
|
||||||
'preference': idx,
|
|
||||||
} for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': episode_details['title'],
|
'title': episode_details['title'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': thumbnails,
|
'thumbnail': url_or_none(episode_image),
|
||||||
'view_count': episode_details.get('view_count'),
|
'view_count': int_or_none(episode_details.get('view_count')),
|
||||||
|
'duration': float_or_none(episode_details.get('duration')),
|
||||||
}
|
}
|
||||||
|
@ -2,9 +2,22 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..compat import compat_kwargs
|
||||||
|
from ..utils import (
|
||||||
|
base_url,
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
txt_or_none,
|
||||||
|
url_basename,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Vbox7IE(InfoExtractor):
|
class Vbox7IE(InfoExtractor):
|
||||||
@ -20,23 +33,27 @@ class Vbox7IE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
(?P<id>[\da-fA-F]+)
|
(?P<id>[\da-fA-F]+)
|
||||||
'''
|
'''
|
||||||
|
_EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
|
||||||
_GEO_COUNTRIES = ['BG']
|
_GEO_COUNTRIES = ['BG']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://vbox7.com/play:0946fff23c',
|
# the http: URL just redirects here
|
||||||
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
|
'url': 'https://vbox7.com/play:0946fff23c',
|
||||||
|
'md5': '50ca1f78345a9c15391af47d8062d074',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0946fff23c',
|
'id': '0946fff23c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Борисов: Притеснен съм за бъдещето на България',
|
'title': 'Борисов: Притеснен съм за бъдещето на България',
|
||||||
'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
|
'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'timestamp': 1470982814,
|
'timestamp': 1470982814,
|
||||||
'upload_date': '20160812',
|
'upload_date': '20160812',
|
||||||
'uploader': 'zdraveibulgaria',
|
'uploader': 'zdraveibulgaria',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 2640,
|
||||||
},
|
},
|
||||||
'params': {
|
'expected_warnings': [
|
||||||
'proxy': '127.0.0.1:8118',
|
'Unable to download webpage',
|
||||||
},
|
],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vbox7.com/play:249bb972c2',
|
'url': 'http://vbox7.com/play:249bb972c2',
|
||||||
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
'md5': '99f65c0c9ef9b682b97313e052734c3f',
|
||||||
@ -44,8 +61,15 @@ class Vbox7IE(InfoExtractor):
|
|||||||
'id': '249bb972c2',
|
'id': '249bb972c2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
'title': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
|
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
|
||||||
|
'timestamp': 1360215023,
|
||||||
|
'upload_date': '20130207',
|
||||||
|
'uploader': 'svideteliat_ot_varshava',
|
||||||
|
'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
'duration': 83,
|
||||||
},
|
},
|
||||||
'skip': 'georestricted',
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -54,52 +78,127 @@ class Vbox7IE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(cls, webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(cls._EMBED_REGEX[0], webpage)
|
||||||
r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
|
|
||||||
webpage)
|
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
|
# specialisation to transform what looks like ld+json that
|
||||||
|
# may contain invalid character combinations
|
||||||
|
|
||||||
|
# transform_source=None, fatal=True
|
||||||
|
def _parse_json(self, json_string, video_id, *args, **kwargs):
|
||||||
|
if '"@context"' in json_string[:30]:
|
||||||
|
# this is ld+json, or that's the way to bet
|
||||||
|
transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
|
||||||
|
if not transform_source:
|
||||||
|
|
||||||
|
def fix_chars(src):
|
||||||
|
# fix malformed ld+json: replace raw CRLFs with escaped LFs
|
||||||
|
return re.sub(
|
||||||
|
r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
|
||||||
|
|
||||||
|
if len(args) > 0:
|
||||||
|
args = (fix_chars,) + args[1:]
|
||||||
|
else:
|
||||||
|
kwargs['transform_source'] = fix_chars
|
||||||
|
kwargs = compat_kwargs(kwargs)
|
||||||
|
|
||||||
|
return super(Vbox7IE, self)._parse_json(
|
||||||
|
json_string, video_id, *args, **kwargs)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
url = 'https://vbox7.com/play:%s' % (video_id,)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id,
|
'https://www.vbox7.com/aj/player/item/options', video_id,
|
||||||
video_id)
|
query={'vid': video_id}, headers={'Referer': url})
|
||||||
|
# estimate time to which possible `ago` member is relative
|
||||||
|
now = now + 0.5 * (time.time() - now)
|
||||||
|
|
||||||
if 'error' in response:
|
if traverse_obj(response, 'error'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||||
|
|
||||||
video = response['options']
|
src_url = traverse_obj(response, ('options', 'src', T(url_or_none))) or ''
|
||||||
|
|
||||||
title = video['title']
|
fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
|
||||||
video_url = video['src']
|
if fmt_base in ('na', 'vn'):
|
||||||
|
|
||||||
if '/na.mp4' in video_url:
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
uploader = video.get('uploader')
|
ext = determine_ext(src_url)
|
||||||
|
if ext == 'mpd':
|
||||||
|
# extract MPD
|
||||||
|
try:
|
||||||
|
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||||
|
src_url, video_id, 'dash', fatal=False)
|
||||||
|
except KeyError: # fatal doesn't catch this
|
||||||
|
self.report_warning('Failed to parse MPD manifest')
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
elif ext != 'm3u8':
|
||||||
|
formats = [{
|
||||||
|
'url': src_url,
|
||||||
|
}] if src_url else []
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
if src_url:
|
||||||
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None)
|
# possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100
|
||||||
|
fmt_base = base_url(src_url) + fmt_base
|
||||||
|
# prepare for _extract_m3u8_formats_and_subtitles()
|
||||||
|
# hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
hls_formats = self._extract_m3u8_formats(
|
||||||
|
'{0}.m3u8'.format(fmt_base), video_id, m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(hls_formats)
|
||||||
|
# self._merge_subtitles(hls_subs, target=subtitles)
|
||||||
|
|
||||||
info = {}
|
# In case MPD/HLS cannot be parsed, or anyway, get mp4 combined
|
||||||
|
# formats usually provided to Safari, iOS, and old Windows
|
||||||
|
video = response['options']
|
||||||
|
resolutions = (1080, 720, 480, 240, 144)
|
||||||
|
highest_res = traverse_obj(video, (
|
||||||
|
'highestRes', T(int))) or resolutions[0]
|
||||||
|
resolutions = traverse_obj(video, (
|
||||||
|
'resolutions', lambda _, r: highest_res >= int(r) > 0)) or resolutions
|
||||||
|
mp4_formats = traverse_obj(resolutions, (
|
||||||
|
Ellipsis, T(lambda res: {
|
||||||
|
'url': '{0}_{1}.mp4'.format(fmt_base, res),
|
||||||
|
'format_id': 'http-{0}'.format(res),
|
||||||
|
'height': res,
|
||||||
|
})))
|
||||||
|
# if above formats are flaky, enable the line below
|
||||||
|
# self._check_formats(mp4_formats, video_id)
|
||||||
|
formats.extend(mp4_formats)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||||
|
|
||||||
if webpage:
|
|
||||||
info = self._search_json_ld(
|
info = self._search_json_ld(
|
||||||
webpage.replace('"/*@context"', '"@context"'), video_id,
|
webpage.replace('"/*@context"', '"@context"'), video_id,
|
||||||
fatal=False)
|
fatal=False) if webpage else {}
|
||||||
|
|
||||||
info.update({
|
if not info.get('title'):
|
||||||
|
info['title'] = traverse_obj(response, (
|
||||||
|
'options', 'title', T(txt_or_none))) or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
def if_missing(k):
|
||||||
|
return lambda x: None if k in info else x
|
||||||
|
|
||||||
|
info = merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'formats': formats,
|
||||||
'url': video_url,
|
'subtitles': subtitles or None,
|
||||||
'uploader': uploader,
|
}, info, traverse_obj(response, ('options', {
|
||||||
'thumbnail': self._proto_relative_url(
|
'uploader': ('uploader', T(txt_or_none)),
|
||||||
|
'timestamp': ('ago', T(if_missing('timestamp')), T(lambda t: int(round((now - t) / 60.0)) * 60)),
|
||||||
|
'duration': ('duration', T(if_missing('duration')), T(float_or_none)),
|
||||||
|
})))
|
||||||
|
if 'thumbnail' not in info:
|
||||||
|
info['thumbnail'] = self._proto_relative_url(
|
||||||
info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
info.get('thumbnail') or self._og_search_thumbnail(webpage),
|
||||||
'http:'),
|
'https:'),
|
||||||
})
|
|
||||||
return info
|
return info
|
||||||
|
@ -6,22 +6,31 @@ import re
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_b64decode,
|
||||||
|
compat_ord,
|
||||||
|
compat_struct_pack,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
|
parse_qs,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
urljoin,
|
urljoin,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
|
||||||
compat_b64decode,
|
|
||||||
compat_ord,
|
def compat_random_choices(population, *args, **kwargs):
|
||||||
compat_struct_pack,
|
# weights=None, *, cum_weights=None, k=1
|
||||||
compat_urlparse,
|
# limited implementation needed here
|
||||||
)
|
weights = args[0] if args else kwargs.get('weights')
|
||||||
|
assert all(w is None for w in (weights, kwargs.get('cum_weights')))
|
||||||
|
k = kwargs.get('k', 1)
|
||||||
|
return ''.join(random.choice(population) for _ in range(k))
|
||||||
|
|
||||||
|
|
||||||
class VideaIE(InfoExtractor):
|
class VideaIE(InfoExtractor):
|
||||||
@ -35,6 +44,7 @@ class VideaIE(InfoExtractor):
|
|||||||
)
|
)
|
||||||
(?P<id>[^?#&]+)
|
(?P<id>[^?#&]+)
|
||||||
'''
|
'''
|
||||||
|
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
|
'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
|
||||||
'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
|
'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
|
||||||
@ -44,6 +54,7 @@ class VideaIE(InfoExtractor):
|
|||||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 21,
|
'duration': 21,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||||
@ -54,6 +65,7 @@ class VideaIE(InfoExtractor):
|
|||||||
'title': 'Supercars előzés',
|
'title': 'Supercars előzés',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 64,
|
'duration': 64,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||||
@ -64,6 +76,7 @@ class VideaIE(InfoExtractor):
|
|||||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 21,
|
'duration': 21,
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||||
@ -80,11 +93,14 @@ class VideaIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
|
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(cls, webpage):
|
||||||
return [url for _, url in re.findall(
|
def yield_urls():
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
|
for pattern in cls._EMBED_REGEX:
|
||||||
webpage)]
|
for m in re.finditer(pattern, webpage):
|
||||||
|
yield m.group('url')
|
||||||
|
|
||||||
|
return list(yield_urls())
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def rc4(cipher_text, key):
|
def rc4(cipher_text, key):
|
||||||
@ -130,13 +146,13 @@ class VideaIE(InfoExtractor):
|
|||||||
for i in range(0, 32):
|
for i in range(0, 32):
|
||||||
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
||||||
|
|
||||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
|
query = parse_qs(player_url)
|
||||||
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
random_seed = ''.join(compat_random_choices(string.ascii_letters + string.digits, k=8))
|
||||||
query['_s'] = random_seed
|
query['_s'] = random_seed
|
||||||
query['_t'] = result[:16]
|
query['_t'] = result[:16]
|
||||||
|
|
||||||
b64_info, handle = self._download_webpage_handle(
|
b64_info, handle = self._download_webpage_handle(
|
||||||
'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
|
'http://videa.hu/player/xml', video_id, query=query)
|
||||||
if b64_info.startswith('<?xml'):
|
if b64_info.startswith('<?xml'):
|
||||||
info = self._parse_xml(b64_info, video_id)
|
info = self._parse_xml(b64_info, video_id)
|
||||||
else:
|
else:
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
@ -11,6 +12,7 @@ from ..utils import (
|
|||||||
strip_or_none,
|
strip_or_none,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
str_to_int,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -35,6 +37,26 @@ class VidLiiIE(InfoExtractor):
|
|||||||
'categories': ['News & Politics'],
|
'categories': ['News & Politics'],
|
||||||
'tags': ['Vidlii', 'Jan', 'Videogames'],
|
'tags': ['Vidlii', 'Jan', 'Videogames'],
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# HD
|
||||||
|
'url': 'https://www.vidlii.com/watch?v=2Ng8Abj2Fkl',
|
||||||
|
'md5': '450e7da379c884788c3a4fa02a3ce1a4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2Ng8Abj2Fkl',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'test',
|
||||||
|
'description': 'md5:cc55a86032a7b6b3cbfd0f6b155b52e9',
|
||||||
|
'thumbnail': 'https://www.vidlii.com/usfi/thmp/2Ng8Abj2Fkl.jpg',
|
||||||
|
'uploader': 'VidLii',
|
||||||
|
'uploader_url': 'https://www.vidlii.com/user/VidLii',
|
||||||
|
'upload_date': '20200927',
|
||||||
|
'duration': 5,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'average_rating': float,
|
||||||
|
'categories': ['Film & Animation'],
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
|
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -46,11 +68,32 @@ class VidLiiIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
|
'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
formats = []
|
||||||
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
|
|
||||||
'video url', group='url')
|
|
||||||
|
|
||||||
title = self._search_regex(
|
def add_format(format_url, height=None):
|
||||||
|
height = int(self._search_regex(r'(\d+)\.mp4',
|
||||||
|
format_url, 'height', default=360))
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%dp' % height if height else None,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
|
||||||
|
sources = re.findall(
|
||||||
|
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if len(sources) > 1:
|
||||||
|
add_format(sources[1][1])
|
||||||
|
self._check_formats(formats, video_id)
|
||||||
|
if len(sources) > 0:
|
||||||
|
add_format(sources[0][1])
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
|
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
|
||||||
'title')
|
'title')
|
||||||
|
|
||||||
@ -82,9 +125,9 @@ class VidLiiIE(InfoExtractor):
|
|||||||
default=None) or self._search_regex(
|
default=None) or self._search_regex(
|
||||||
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
view_count = int_or_none(self._search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
(r'<strong>(\d+)</strong> views',
|
(r'<strong>([\d,.]+)</strong> views',
|
||||||
r'Views\s*:\s*<strong>(\d+)</strong>'),
|
r'Views\s*:\s*<strong>([\d,.]+)</strong>'),
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
|
|
||||||
comment_count = int_or_none(self._search_regex(
|
comment_count = int_or_none(self._search_regex(
|
||||||
@ -109,7 +152,7 @@ class VidLiiIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': video_url,
|
'formats': formats,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -673,8 +673,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
if '//player.vimeo.com/video/' in url:
|
if '//player.vimeo.com/video/' in url:
|
||||||
config = self._parse_json(self._search_regex(
|
config = self._search_json(
|
||||||
r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
|
r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
|
||||||
if config.get('view') == 4:
|
if config.get('view') == 4:
|
||||||
config = self._verify_player_video_password(
|
config = self._verify_player_video_password(
|
||||||
redirect_url, video_id, headers)
|
redirect_url, video_id, headers)
|
||||||
|
55
youtube_dl/extractor/whyp.py
Normal file
55
youtube_dl/extractor/whyp.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
str_or_none,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WhypIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
|
||||||
|
'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
|
||||||
|
'info_dict': {
|
||||||
|
'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
|
||||||
|
'id': '18337',
|
||||||
|
'title': 'Home Page Example Track',
|
||||||
|
'description': r're:(?s).+\bexample track\b',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 52.82,
|
||||||
|
'uploader': 'Brad',
|
||||||
|
'uploader_id': '1',
|
||||||
|
'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.whyp.it/tracks/18337',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
unique_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, unique_id)
|
||||||
|
data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'url': data['audio_url'],
|
||||||
|
'id': unique_id,
|
||||||
|
}, traverse_obj(data, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('duration', T(float_or_none)),
|
||||||
|
'uploader': ('user', 'username'),
|
||||||
|
'uploader_id': ('user', 'id', T(str_or_none)),
|
||||||
|
'thumbnail': ('artwork_url', T(url_or_none)),
|
||||||
|
}), {
|
||||||
|
'ext': 'mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'http_headers': {'Referer': 'https://whyp.it/'},
|
||||||
|
}, rev=True)
|
@ -4,20 +4,28 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_chr
|
from ..compat import (
|
||||||
|
compat_chr,
|
||||||
|
compat_zip as zip,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
decode_packed_codes,
|
decode_packed_codes,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
merge_dicts,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
|
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
|
||||||
def aa_decode(aa_code):
|
def aa_decode(aa_code):
|
||||||
symbol_table = [
|
symbol_table = (
|
||||||
('7', '((゚ー゚) + (o^_^o))'),
|
('7', '((゚ー゚) + (o^_^o))'),
|
||||||
('6', '((o^_^o) +(o^_^o))'),
|
('6', '((o^_^o) +(o^_^o))'),
|
||||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
('5', '((゚ー゚) + (゚Θ゚))'),
|
||||||
@ -26,84 +34,180 @@ def aa_decode(aa_code):
|
|||||||
('3', '(o^_^o)'),
|
('3', '(o^_^o)'),
|
||||||
('1', '(゚Θ゚)'),
|
('1', '(゚Θ゚)'),
|
||||||
('0', '(c^_^o)'),
|
('0', '(c^_^o)'),
|
||||||
]
|
('+', ''),
|
||||||
|
)
|
||||||
delim = '(゚Д゚)[゚ε゚]+'
|
delim = '(゚Д゚)[゚ε゚]+'
|
||||||
ret = ''
|
|
||||||
for aa_char in aa_code.split(delim):
|
def chr_from_code(c):
|
||||||
for val, pat in symbol_table:
|
for val, pat in symbol_table:
|
||||||
aa_char = aa_char.replace(pat, val)
|
c = c.replace(pat, val)
|
||||||
aa_char = aa_char.replace('+ ', '')
|
if c.startswith(('u', 'U')):
|
||||||
m = re.match(r'^\d+', aa_char)
|
base = 16
|
||||||
if m:
|
c = c[1:]
|
||||||
ret += compat_chr(int(m.group(0), 8))
|
|
||||||
else:
|
else:
|
||||||
m = re.match(r'^u([\da-f]+)', aa_char)
|
base = 10
|
||||||
if m:
|
c = int_or_none(c, base=base)
|
||||||
ret += compat_chr(int(m.group(1), 16))
|
return '' if c is None else compat_chr(c)
|
||||||
return ret
|
|
||||||
|
return ''.join(
|
||||||
|
chr_from_code(aa_char)
|
||||||
|
for aa_char in aa_code.split(delim))
|
||||||
|
|
||||||
|
|
||||||
class XFileShareIE(InfoExtractor):
|
class XFileShareIE(InfoExtractor):
|
||||||
_SITES = (
|
_SITES = (
|
||||||
(r'aparat\.cam', 'Aparat'),
|
# status check 2024-02: site availability, G site: search
|
||||||
(r'clipwatching\.com', 'ClipWatching'),
|
(r'aparat\.cam', 'Aparat'), # Cloudflare says host error 522, apparently changed to wolfstreeam.tv
|
||||||
(r'gounlimited\.to', 'GoUnlimited'),
|
(r'filemoon\.sx/.', 'FileMoon'),
|
||||||
(r'govid\.me', 'GoVid'),
|
(r'gounlimited\.to', 'GoUnlimited'), # no media pages listed
|
||||||
(r'holavid\.com', 'HolaVid'),
|
(r'govid\.me', 'GoVid'), # no media pages listed
|
||||||
(r'streamty\.com', 'Streamty'),
|
(r'highstream\.tv', 'HighStream'), # clipwatching.com redirects here
|
||||||
(r'thevideobee\.to', 'TheVideoBee'),
|
(r'holavid\.com', 'HolaVid'), # Cloudflare says host error 522
|
||||||
(r'uqload\.com', 'Uqload'),
|
# (r'streamty\.com', 'Streamty'), # no media pages listed, connection timeout
|
||||||
(r'vidbom\.com', 'VidBom'),
|
# (r'thevideobee\.to', 'TheVideoBee'), # no pages listed, refuses connection
|
||||||
(r'vidlo\.us', 'vidlo'),
|
(r'uqload\.to', 'Uqload'), # .com, .co redirect here
|
||||||
(r'vidlocker\.xyz', 'VidLocker'),
|
(r'(?:vedbam\.xyz|vadbam.net)', 'V?dB?m'), # vidbom.com redirects here, but no valid media pages listed
|
||||||
(r'vidshare\.tv', 'VidShare'),
|
(r'vidlo\.us', 'vidlo'), # no valid media pages listed
|
||||||
(r'vup\.to', 'VUp'),
|
(r'vidlocker\.xyz', 'VidLocker'), # no media pages listed
|
||||||
|
(r'(?:w\d\.)?viidshar\.com', 'VidShare'), # vidshare.tv redirects here
|
||||||
|
# (r'vup\.to', 'VUp'), # domain not found
|
||||||
(r'wolfstream\.tv', 'WolfStream'),
|
(r'wolfstream\.tv', 'WolfStream'),
|
||||||
(r'xvideosharing\.com', 'XVideoSharing'),
|
(r'xvideosharing\.com', 'XVideoSharing'), # just started showing 'maintenance mode'
|
||||||
)
|
)
|
||||||
|
|
||||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
IE_DESC = 'XFileShare-based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||||
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||||
% '|'.join(site for site in list(zip(*_SITES))[0]))
|
% '|'.join(site for site in list(zip(*_SITES))[0]))
|
||||||
|
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
|
||||||
|
|
||||||
_FILE_NOT_FOUND_REGEXES = (
|
_FILE_NOT_FOUND_REGEXES = (
|
||||||
r'>(?:404 - )?File Not Found<',
|
r'>(?:404 - )?File Not Found<',
|
||||||
r'>The file was removed by administrator<',
|
r'>The file was removed by administrator<',
|
||||||
)
|
)
|
||||||
|
_TITLE_REGEXES = (
|
||||||
|
r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||||
|
r'<td nowrap>([^<]+)</td>',
|
||||||
|
r'h4-fine[^>]*>([^<]+)<',
|
||||||
|
r'>Watch (.+)[ <]',
|
||||||
|
r'<h2 class="video-page-head">([^<]+)</h2>',
|
||||||
|
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to (dead)
|
||||||
|
r'title\s*:\s*"([^"]+)"', # govid.me
|
||||||
|
)
|
||||||
|
_SOURCE_URL_REGEXES = (
|
||||||
|
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||||
|
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
||||||
|
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
||||||
|
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
||||||
|
)
|
||||||
|
_THUMBNAIL_REGEXES = (
|
||||||
|
r'<video[^>]+poster="([^"]+)"',
|
||||||
|
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
|
||||||
|
)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://xvideosharing.com/fq65f94nd2ve',
|
'note': 'link in `sources`',
|
||||||
'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
|
'url': 'https://uqload.to/dcsu06gdb45o',
|
||||||
|
'md5': '7f8db187b254379440bf4fcad094ae86',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fq65f94nd2ve',
|
'id': 'dcsu06gdb45o',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'sample',
|
'title': 'f2e31015957e74c8c8427982e161c3fc mp4',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https://.*\.jpg'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'nocheckcertificate': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to extract JWPlayer data'],
|
||||||
|
}, {
|
||||||
|
'note': 'link in decoded `sources`',
|
||||||
|
'url': 'https://xvideosharing.com/1tlg6agrrdgc',
|
||||||
|
'md5': '2608ce41932c1657ae56258a64e647d9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1tlg6agrrdgc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '0121',
|
||||||
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
|
},
|
||||||
|
'skip': 'This server is in maintenance mode.',
|
||||||
|
}, {
|
||||||
|
'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
|
||||||
|
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
|
||||||
|
'md5': '5a713742f57ac4aef29b74733e8dda01',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dw40rxrzruqz',
|
||||||
|
'title': 'dw40rxrzruqz',
|
||||||
|
'ext': 'mp4'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
|
||||||
|
'url': 'https://vadbam.net/6lnbkci96wly.html',
|
||||||
|
'md5': 'a1616800076177e2ac769203957c54bc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6lnbkci96wly',
|
||||||
|
'title': 'Heart Crime S01 E03 weciima autos',
|
||||||
|
'ext': 'mp4'
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'JWPlayer link in clear',
|
||||||
|
'url': 'https://w1.viidshar.com/nnibe0xf0h79.html',
|
||||||
|
'md5': 'f0a580ce9df06cc61b4a5c979d672367',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nnibe0xf0h79',
|
||||||
|
'title': 'JaGa 68ar',
|
||||||
|
'ext': 'mp4'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'ffmpeg',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['hlsnative has detected features it does not support'],
|
||||||
|
}, {
|
||||||
|
'note': 'JWPlayer link in clear',
|
||||||
|
'url': 'https://wolfstream.tv/a3drtehyrg52.html',
|
||||||
|
'md5': '1901d86a79c5e0c6a51bdc9a4cfd3769',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a3drtehyrg52',
|
||||||
|
'title': 'NFL 2023 W04 DET@GB',
|
||||||
|
'ext': 'mp4'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://wolfstream.tv/nthme29v9u2x',
|
'url': 'https://uqload.to/ug5somm0ctnk.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://highstream.tv/2owiyz3sjoux',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://vedbam.xyz/6lnbkci96wly.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(cls, webpage):
|
||||||
return [
|
|
||||||
mobj.group('url')
|
def yield_urls():
|
||||||
for mobj in re.finditer(
|
for regex in cls._EMBED_REGEX:
|
||||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
|
for mobj in re.finditer(regex, webpage):
|
||||||
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
|
yield mobj.group('url')
|
||||||
webpage)]
|
|
||||||
|
return list(yield_urls())
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
host, video_id = self._match_valid_url(url).group('host', 'id')
|
||||||
|
|
||||||
url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
|
url = 'https://%s/%s' % (
|
||||||
|
host,
|
||||||
|
'embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
container_div = get_element_by_id('container', webpage) or webpage
|
||||||
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
|
if self._search_regex(
|
||||||
|
r'>This server is in maintenance mode\.', container_div,
|
||||||
|
'maint error', group=0, default=None):
|
||||||
|
raise ExtractorError(clean_html(container_div), expected=True)
|
||||||
|
if self._search_regex(
|
||||||
|
self._FILE_NOT_FOUND_REGEXES, container_div,
|
||||||
|
'missing video error', group=0, default=None):
|
||||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||||
|
|
||||||
fields = self._hidden_inputs(webpage)
|
fields = self._hidden_inputs(webpage)
|
||||||
@ -122,59 +226,43 @@ class XFileShareIE(InfoExtractor):
|
|||||||
'Content-type': 'application/x-www-form-urlencoded',
|
'Content-type': 'application/x-www-form-urlencoded',
|
||||||
})
|
})
|
||||||
|
|
||||||
title = (self._search_regex(
|
title = (
|
||||||
(r'style="z-index: [0-9]+;">([^<]+)</span>',
|
self._search_regex(self._TITLE_REGEXES, webpage, 'title', default=None)
|
||||||
r'<td nowrap>([^<]+)</td>',
|
or self._og_search_title(webpage, default=None)
|
||||||
r'h4-fine[^>]*>([^<]+)<',
|
or video_id).strip()
|
||||||
r'>Watch (.+)[ <]',
|
|
||||||
r'<h2 class="video-page-head">([^<]+)</h2>',
|
|
||||||
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
|
|
||||||
r'title\s*:\s*"([^"]+)"'), # govid.me
|
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
|
||||||
webpage, default=None) or video_id).strip()
|
|
||||||
|
|
||||||
|
obf_code = True
|
||||||
|
while obf_code:
|
||||||
for regex, func in (
|
for regex, func in (
|
||||||
(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
|
(r'(?s)(?<!-)\b(eval\(function\(p,a,c,k,e,d\)\{(?:(?!</script>).)+\)\))',
|
||||||
|
decode_packed_codes),
|
||||||
(r'(゚.+)', aa_decode)):
|
(r'(゚.+)', aa_decode)):
|
||||||
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
|
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
|
||||||
if obf_code:
|
if obf_code:
|
||||||
webpage = webpage.replace(obf_code, func(obf_code))
|
webpage = webpage.replace(obf_code, func(obf_code))
|
||||||
|
break
|
||||||
|
|
||||||
formats = []
|
jwplayer_data = self._find_jwplayer_data(
|
||||||
|
webpage.replace(r'\'', '\''), video_id)
|
||||||
|
result = self._parse_jwplayer_data(
|
||||||
|
jwplayer_data, video_id, require_title=False,
|
||||||
|
m3u8_id='hls', mpd_id='dash')
|
||||||
|
|
||||||
jwplayer_data = self._search_regex(
|
if not traverse_obj(result, 'formats'):
|
||||||
[
|
|
||||||
r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
|
|
||||||
r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
|
|
||||||
], webpage,
|
|
||||||
'jwplayer data', default=None)
|
|
||||||
if jwplayer_data:
|
if jwplayer_data:
|
||||||
jwplayer_data = self._parse_json(
|
self.report_warning(
|
||||||
jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
|
'Failed to extract JWPlayer formats', video_id=video_id)
|
||||||
if jwplayer_data:
|
urls = set()
|
||||||
formats = self._parse_jwplayer_data(
|
for regex in self._SOURCE_URL_REGEXES:
|
||||||
jwplayer_data, video_id, False,
|
|
||||||
m3u8_id='hls', mpd_id='dash')['formats']
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
urls = []
|
|
||||||
for regex in (
|
|
||||||
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
|
|
||||||
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
|
|
||||||
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
|
|
||||||
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
|
|
||||||
for mobj in re.finditer(regex, webpage):
|
for mobj in re.finditer(regex, webpage):
|
||||||
video_url = mobj.group('url')
|
urls.add(mobj.group('url'))
|
||||||
if video_url not in urls:
|
|
||||||
urls.append(video_url)
|
|
||||||
|
|
||||||
sources = self._search_regex(
|
sources = self._search_regex(
|
||||||
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
|
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
|
||||||
if sources:
|
urls.update(traverse_obj(sources, (T(lambda s: self._parse_json(s, video_id)), Ellipsis)))
|
||||||
urls.extend(self._parse_json(sources, video_id))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_url in urls:
|
for video_url in traverse_obj(urls, (Ellipsis, T(url_or_none))):
|
||||||
if determine_ext(video_url) == 'm3u8':
|
if determine_ext(video_url) == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4',
|
video_url, video_id, 'mp4',
|
||||||
@ -185,17 +273,19 @@ class XFileShareIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': 'sd',
|
'format_id': 'sd',
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
result = {'formats': formats}
|
||||||
|
|
||||||
|
self._sort_formats(result['formats'])
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
[
|
self._THUMBNAIL_REGEXES, webpage, 'thumbnail', default=None)
|
||||||
r'<video[^>]+poster="([^"]+)"',
|
|
||||||
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
|
|
||||||
], webpage, 'thumbnail', default=None)
|
|
||||||
|
|
||||||
return {
|
if not (title or result.get('title')):
|
||||||
|
title = self._generic_title(url) or video_id
|
||||||
|
|
||||||
|
return merge_dicts(result, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title or None,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'formats': formats,
|
'http_headers': {'Referer': url}
|
||||||
}
|
})
|
||||||
|
@ -106,6 +106,25 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://music.yandex.com/album/540508/track/4878838',
|
'url': 'http://music.yandex.com/album/540508/track/4878838',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://music.yandex.ru/album/16302456/track/85430762',
|
||||||
|
'md5': '11b8d50ab03b57738deeaadf661a0a48',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '85430762',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'abr': 128,
|
||||||
|
'title': 'Haddadi Von Engst, Phonic Youth, Super Flu - Til The End (Super Flu Remix)',
|
||||||
|
'filesize': int,
|
||||||
|
'duration': 431.14,
|
||||||
|
'track': 'Til The End (Super Flu Remix)',
|
||||||
|
'album': 'Til The End',
|
||||||
|
'album_artist': 'Haddadi Von Engst, Phonic Youth',
|
||||||
|
'artist': 'Haddadi Von Engst, Phonic Youth, Super Flu',
|
||||||
|
'release_year': 2021,
|
||||||
|
'genre': 'house',
|
||||||
|
'disc_number': 1,
|
||||||
|
'track_number': 2,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -116,10 +135,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
|||||||
'track', tld, url, track_id, 'Downloading track JSON',
|
'track', tld, url, track_id, 'Downloading track JSON',
|
||||||
{'track': '%s:%s' % (track_id, album_id)})['track']
|
{'track': '%s:%s' % (track_id, album_id)})['track']
|
||||||
track_title = track['title']
|
track_title = track['title']
|
||||||
|
track_version = track.get('version')
|
||||||
|
if track_version:
|
||||||
|
track_title = '%s (%s)' % (track_title, track_version)
|
||||||
|
|
||||||
download_data = self._download_json(
|
download_data = self._download_json(
|
||||||
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
|
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
|
||||||
track_id, 'Downloading track location url JSON',
|
track_id, 'Downloading track location url JSON',
|
||||||
|
query={'hq': 1},
|
||||||
headers={'X-Retpath-Y': url})
|
headers={'X-Retpath-Y': url})
|
||||||
|
|
||||||
fd_data = self._download_json(
|
fd_data = self._download_json(
|
||||||
|
@ -1,20 +1,38 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
merge_dicts,
|
||||||
|
parse_count,
|
||||||
|
parse_qs,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class YouPornIE(InfoExtractor):
|
class YouPornIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
|
_VALID_URL = (
|
||||||
|
r'youporn:(?P<id>\d+)',
|
||||||
|
r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
|
||||||
|
(?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||||
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
|
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
|
||||||
@ -34,7 +52,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'skip': 'This video has been disabled',
|
'skip': 'This video has been deactivated',
|
||||||
}, {
|
}, {
|
||||||
# Unknown uploader
|
# Unknown uploader
|
||||||
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
||||||
@ -66,57 +84,104 @@ class YouPornIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16290308',
|
||||||
|
'age_limit': 18,
|
||||||
|
'categories': [],
|
||||||
|
'description': None, # SEO spam using title removed
|
||||||
|
'display_id': 'tinderspecial-trailer1',
|
||||||
|
'duration': 298.0,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20201123',
|
||||||
|
'uploader': 'Ersties',
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
|
||||||
|
'timestamp': 1606147564,
|
||||||
|
'title': 'Tinder In Real Life',
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(cls, webpage):
|
||||||
return re.findall(
|
def yield_urls():
|
||||||
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
|
for p in cls._EMBED_REGEX:
|
||||||
webpage)
|
for m in re.finditer(p, webpage):
|
||||||
|
yield m.group('url')
|
||||||
|
|
||||||
|
return list(yield_urls())
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
# A different video ID (data-video-id) is hidden in the page but
|
||||||
video_id = mobj.group('id')
|
# never seems to be used
|
||||||
display_id = mobj.group('display_id') or video_id
|
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||||
|
url = 'http://www.youporn.com/watch/%s' % (video_id,)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
url, video_id, headers={'Cookie': 'age_verified=1'})
|
||||||
|
|
||||||
definitions = self._download_json(
|
watchable = self._search_regex(
|
||||||
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
|
r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
|
||||||
display_id)
|
webpage, 'watchability', default=None)
|
||||||
|
if not watchable:
|
||||||
|
msg = re.split(r'\s{4}', clean_html(get_element_by_id(
|
||||||
|
'mainContent', webpage)) or '')[0]
|
||||||
|
raise ExtractorError(
|
||||||
|
('%s says: %s' % (self.IE_NAME, msg))
|
||||||
|
if msg else 'Video unavailable: no reason found',
|
||||||
|
expected=True)
|
||||||
|
# internal ID ?
|
||||||
|
# video_id = extract_attributes(watchable).get('data-video-id')
|
||||||
|
|
||||||
|
playervars = self._search_json(
|
||||||
|
r'\bplayervars\s*:', webpage, 'playervars', video_id)
|
||||||
|
|
||||||
|
def get_fmt(x):
|
||||||
|
v_url = url_or_none(x.get('videoUrl'))
|
||||||
|
if v_url:
|
||||||
|
x['videoUrl'] = v_url
|
||||||
|
return (x['format'], x)
|
||||||
|
|
||||||
|
defs_by_format = dict(traverse_obj(playervars, (
|
||||||
|
'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
|
||||||
|
|
||||||
|
def get_format_data(f):
|
||||||
|
if f not in defs_by_format:
|
||||||
|
return []
|
||||||
|
return self._download_json(
|
||||||
|
defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for definition in definitions:
|
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
|
||||||
if not isinstance(definition, dict):
|
for hls_url in traverse_obj(
|
||||||
continue
|
get_format_data('hls'),
|
||||||
video_url = url_or_none(definition.get('videoUrl'))
|
(lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
|
||||||
if not video_url:
|
(Ellipsis, 'videoUrl')):
|
||||||
continue
|
formats.extend(self._extract_m3u8_formats(
|
||||||
f = {
|
hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
|
||||||
'url': video_url,
|
entry_protocol='m3u8_native'))
|
||||||
'filesize': int_or_none(definition.get('videoSize')),
|
|
||||||
}
|
for f in traverse_obj(get_format_data('mp4'), (
|
||||||
height = int_or_none(definition.get('quality'))
|
lambda _, v: v.get('videoUrl'), {
|
||||||
|
'url': ('videoUrl', T(url_or_none)),
|
||||||
|
'filesize': ('videoSize', T(int_or_none)),
|
||||||
|
'height': ('quality', T(int_or_none)),
|
||||||
|
}, T(lambda x: x.get('videoUrl') and x))):
|
||||||
# Video URL's path looks like this:
|
# Video URL's path looks like this:
|
||||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||||
# We will benefit from it by extracting some metadata
|
# We will benefit from it by extracting some metadata
|
||||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
|
||||||
if mobj:
|
if mobj:
|
||||||
if not height:
|
if not f.get('height'):
|
||||||
height = int(mobj.group('height'))
|
f['height'] = int(mobj.group('height'))
|
||||||
bitrate = int(mobj.group('bitrate'))
|
f['tbr'] = int(mobj.group('bitrate'))
|
||||||
f.update({
|
f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
|
||||||
'format_id': '%dp-%dk' % (height, bitrate),
|
|
||||||
'tbr': bitrate,
|
|
||||||
})
|
|
||||||
f['height'] = height
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://www.youporn.com/watch/%s' % video_id, display_id,
|
|
||||||
headers={'Cookie': 'age_verified=1'})
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
webpage, 'title', default=None) or self._og_search_title(
|
||||||
@ -131,6 +196,8 @@ class YouPornIE(InfoExtractor):
|
|||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||||
|
duration = traverse_obj(playervars, ('duration', T(int_or_none)))
|
||||||
|
if duration is None:
|
||||||
duration = int_or_none(self._html_search_meta(
|
duration = int_or_none(self._html_search_meta(
|
||||||
'video:duration', webpage, 'duration', fatal=False))
|
'video:duration', webpage, 'duration', fatal=False))
|
||||||
|
|
||||||
@ -148,11 +215,11 @@ class YouPornIE(InfoExtractor):
|
|||||||
|
|
||||||
view_count = None
|
view_count = None
|
||||||
views = self._search_regex(
|
views = self._search_regex(
|
||||||
r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
|
r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
|
||||||
'views', default=None)
|
webpage, 'views', default=None)
|
||||||
if views:
|
if views:
|
||||||
view_count = str_to_int(extract_attributes(views).get('data-value'))
|
view_count = parse_count(extract_attributes(views).get('data-value'))
|
||||||
comment_count = str_to_int(self._search_regex(
|
comment_count = parse_count(self._search_regex(
|
||||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||||
webpage, 'comment count', default=None))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
@ -168,7 +235,10 @@ class YouPornIE(InfoExtractor):
|
|||||||
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
|
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
|
||||||
'tags')
|
'tags')
|
||||||
|
|
||||||
return {
|
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
|
||||||
|
data.pop('url', None)
|
||||||
|
|
||||||
|
result = merge_dicts(data, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -183,4 +253,442 @@ class YouPornIE(InfoExtractor):
|
|||||||
'tags': tags,
|
'tags': tags,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
})
|
||||||
|
# Remove promotional non-description
|
||||||
|
if result.get('description', '').startswith(
|
||||||
|
'Watch %s online' % (result['title'],)):
|
||||||
|
del result['description']
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornListBase(InfoExtractor):
|
||||||
|
# pattern in '.title-text' element of page section containing videos
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
|
||||||
|
_PAGE_RETRY_COUNT = 0 # ie, no retry
|
||||||
|
_PAGE_RETRY_DELAY = 2 # seconds
|
||||||
|
|
||||||
|
def _get_next_url(self, url, pl_id, html):
|
||||||
|
return urljoin(url, self._search_regex(
|
||||||
|
r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
|
||||||
|
get_element_by_id('next', html) or '', 'next page',
|
||||||
|
group='url', default=None))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_title_from_slug(cls, title_slug):
|
||||||
|
return re.sub(r'[_-]', ' ', title_slug)
|
||||||
|
|
||||||
|
def _entries(self, url, pl_id, html=None, page_num=None):
|
||||||
|
|
||||||
|
# separates page sections
|
||||||
|
PLAYLIST_SECTION_RE = (
|
||||||
|
r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
|
||||||
|
)
|
||||||
|
# contains video link
|
||||||
|
VIDEO_URL_RE = r'''(?x)
|
||||||
|
<div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
|
||||||
|
(?:<div\b[\s\S]+?</div>\s*)*
|
||||||
|
<a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
|
||||||
|
'''
|
||||||
|
|
||||||
|
def yield_pages(url, html=html, page_num=page_num):
|
||||||
|
fatal = not html
|
||||||
|
for pnum in itertools.count(start=page_num or 1):
|
||||||
|
if not html:
|
||||||
|
html = self._download_webpage(
|
||||||
|
url, pl_id, note='Downloading page %d' % pnum,
|
||||||
|
fatal=fatal)
|
||||||
|
if not html:
|
||||||
|
break
|
||||||
|
fatal = False
|
||||||
|
yield (url, html, pnum)
|
||||||
|
# explicit page: extract just that page
|
||||||
|
if page_num is not None:
|
||||||
|
break
|
||||||
|
next_url = self._get_next_url(url, pl_id, html)
|
||||||
|
if not next_url or next_url == url:
|
||||||
|
break
|
||||||
|
url, html = next_url, None
|
||||||
|
|
||||||
|
def retry_page(msg, tries_left, page_data):
|
||||||
|
if tries_left <= 0:
|
||||||
|
return
|
||||||
|
self.report_warning(msg, pl_id)
|
||||||
|
sleep(self._PAGE_RETRY_DELAY)
|
||||||
|
return next(
|
||||||
|
yield_pages(page_data[0], page_num=page_data[2]), None)
|
||||||
|
|
||||||
|
def yield_entries(html):
|
||||||
|
for frag in re.split(PLAYLIST_SECTION_RE, html):
|
||||||
|
if not frag:
|
||||||
|
continue
|
||||||
|
t_text = get_element_by_class('title-text', frag or '')
|
||||||
|
if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
|
||||||
|
continue
|
||||||
|
for m in re.finditer(VIDEO_URL_RE, frag):
|
||||||
|
video_url = urljoin(url, m.group('url'))
|
||||||
|
if video_url:
|
||||||
|
yield self.url_result(video_url)
|
||||||
|
|
||||||
|
last_first_url = None
|
||||||
|
for page_data in yield_pages(url, html=html, page_num=page_num):
|
||||||
|
# page_data: url, html, page_num
|
||||||
|
first_url = None
|
||||||
|
tries_left = self._PAGE_RETRY_COUNT + 1
|
||||||
|
while tries_left > 0:
|
||||||
|
tries_left -= 1
|
||||||
|
for from_ in yield_entries(page_data[1]):
|
||||||
|
# may get the same page twice instead of empty page
|
||||||
|
# or (site bug) intead of actual next page
|
||||||
|
if not first_url:
|
||||||
|
first_url = from_['url']
|
||||||
|
if first_url == last_first_url:
|
||||||
|
# sometimes (/porntags/) the site serves the previous page
|
||||||
|
# instead but may provide the correct page after a delay
|
||||||
|
page_data = retry_page(
|
||||||
|
'Retrying duplicate page...', tries_left, page_data)
|
||||||
|
if page_data:
|
||||||
|
first_url = None
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
yield from_
|
||||||
|
else:
|
||||||
|
if not first_url and 'no-result-paragarph1' in page_data[1]:
|
||||||
|
page_data = retry_page(
|
||||||
|
'Retrying empty page...', tries_left, page_data)
|
||||||
|
if page_data:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# success/failure
|
||||||
|
break
|
||||||
|
# may get an infinite (?) sequence of empty pages
|
||||||
|
if not first_url:
|
||||||
|
break
|
||||||
|
last_first_url = first_url
|
||||||
|
|
||||||
|
def _real_extract(self, url, html=None):
|
||||||
|
# exceptionally, id may be None
|
||||||
|
m_dict = self._match_valid_url(url).groupdict()
|
||||||
|
pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
|
||||||
|
|
||||||
|
qs = parse_qs(url)
|
||||||
|
for q, v in qs.items():
|
||||||
|
if v:
|
||||||
|
qs[q] = v[-1]
|
||||||
|
else:
|
||||||
|
del qs[q]
|
||||||
|
|
||||||
|
base_id = pl_id or 'YouPorn'
|
||||||
|
title = self._get_title_from_slug(base_id)
|
||||||
|
if page_type:
|
||||||
|
title = '%s %s' % (page_type.capitalize(), title)
|
||||||
|
base_id = [base_id.lower()]
|
||||||
|
if sort is None:
|
||||||
|
title += ' videos'
|
||||||
|
else:
|
||||||
|
title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
|
||||||
|
base_id.append(sort)
|
||||||
|
if qs:
|
||||||
|
ps = ['%s=%s' % item for item in sorted(qs.items())]
|
||||||
|
title += ' (%s)' % ','.join(ps)
|
||||||
|
base_id.extend(ps)
|
||||||
|
pl_id = '/'.join(base_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(url, pl_id, html=html,
|
||||||
|
page_num=int_or_none(qs.get('page'))),
|
||||||
|
playlist_id=pl_id, playlist_title=title)
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornCategoryIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>category)/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/category/lingerie/popular/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lingerie/popular',
|
||||||
|
'title': 'Category lingerie videos by popular',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 39,
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with single page result',
|
||||||
|
'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lingerie/duration/min_minutes=10',
|
||||||
|
'title': 'Category lingerie videos by duration (min_minutes=10)',
|
||||||
|
},
|
||||||
|
'playlist_maxcount': 30,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list',
|
||||||
|
'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'lingerie/popular/page=1',
|
||||||
|
'title': 'Category lingerie videos by popular (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_count': 30,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornChannelIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn channel, with sorting and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>channel)/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/channel/x-feeds/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x-feeds',
|
||||||
|
'title': 'Channel X-Feeds videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 37,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list (no filters here)',
|
||||||
|
'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'x-feeds/duration/page=1',
|
||||||
|
'title': 'Channel X-Feeds videos by duration (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_count': 24,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_title_from_slug(title_slug):
|
||||||
|
return re.sub(r'_', ' ', title_slug).title()
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornCollectionIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>collection)s/videos/(?P<id>\d+)
|
||||||
|
(?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/collections/videos/33044251/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '33044251',
|
||||||
|
'title': 'Collection Sexy Lips videos',
|
||||||
|
'uploader': 'ph-littlewillyb',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 50,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list (no filters here)',
|
||||||
|
'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '33044251/time/page=1',
|
||||||
|
'title': 'Collection Sexy Lips videos by time (page=1)',
|
||||||
|
'uploader': 'ph-littlewillyb',
|
||||||
|
},
|
||||||
|
'playlist_count': 20,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
pl_id = self._match_id(url)
|
||||||
|
html = self._download_webpage(url, pl_id)
|
||||||
|
playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
|
||||||
|
infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
|
||||||
|
'collection-infos', html)) or '')
|
||||||
|
title, uploader = self._search_regex(
|
||||||
|
r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
|
||||||
|
infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
|
||||||
|
'uploader': uploader,
|
||||||
|
}, playlist) if title else playlist
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornTagIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
porn(?P<type>tag)s/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
|
||||||
|
_PAGE_RETRY_COUNT = 1
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/porntags/austrian',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'austrian',
|
||||||
|
'title': 'Tag austrian videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 35,
|
||||||
|
'expected_warnings': ['Retrying duplicate page'],
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with single page result',
|
||||||
|
'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'austrian/duration/min_minutes=10',
|
||||||
|
'title': 'Tag austrian videos by duration (min_minutes=10)',
|
||||||
|
},
|
||||||
|
# number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
|
||||||
|
# or more, varying with number of ads; let's set max as 9x4
|
||||||
|
# NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
|
||||||
|
'playlist_maxcount': 32,
|
||||||
|
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list',
|
||||||
|
'url': 'https://www.youporn.com/porntags/austrian/?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'austrian/page=1',
|
||||||
|
'title': 'Tag austrian videos (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 32,
|
||||||
|
'playlist_maxcount': 34,
|
||||||
|
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
|
||||||
|
}]
|
||||||
|
|
||||||
|
# YP tag navigation is broken, loses sort
|
||||||
|
def _get_next_url(self, url, pl_id, html):
|
||||||
|
next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
|
||||||
|
if next_url:
|
||||||
|
n = self._match_valid_url(next_url)
|
||||||
|
if n:
|
||||||
|
s = n.groupdict().get('sort')
|
||||||
|
if s:
|
||||||
|
u = self._match_valid_url(url)
|
||||||
|
if u:
|
||||||
|
u = u.groupdict().get('sort')
|
||||||
|
if s and not u:
|
||||||
|
n = n.end('sort')
|
||||||
|
next_url = next_url[:n] + '/' + u + next_url[n:]
|
||||||
|
return next_url
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornStarIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?P<type>pornstar)/(?P<id>[^/?#&]+)
|
||||||
|
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination',
|
||||||
|
'url': 'https://www.youporn.com/pornstar/daynia/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'daynia',
|
||||||
|
'title': 'Pornstar Daynia videos',
|
||||||
|
'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 45,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list (no filters here)',
|
||||||
|
'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'daynia/page=1',
|
||||||
|
'title': 'Pornstar Daynia videos (page=1)',
|
||||||
|
'description': 're:.{180,}',
|
||||||
|
},
|
||||||
|
'playlist_count': 26,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_title_from_slug(title_slug):
|
||||||
|
return re.sub(r'_', ' ', title_slug).title()
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
pl_id = self._match_id(url)
|
||||||
|
html = self._download_webpage(url, pl_id)
|
||||||
|
playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
|
||||||
|
INFO_ELEMENT_RE = r'''(?x)
|
||||||
|
<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
|
||||||
|
(?P<info>[\s\S]+?)(?:</div>\s*){6,}
|
||||||
|
'''
|
||||||
|
|
||||||
|
infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
|
||||||
|
if infos:
|
||||||
|
infos = re.sub(
|
||||||
|
r'(?:\s*nl=nl)+\s*', ' ',
|
||||||
|
re.sub(r'(?u)\s+', ' ', clean_html(
|
||||||
|
re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'description': infos.strip() or None,
|
||||||
|
}, playlist)
|
||||||
|
|
||||||
|
|
||||||
|
class YouPornVideosIE(YouPornListBase):
|
||||||
|
IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?youporn\.com/
|
||||||
|
(?:(?P<id>browse)/)?
|
||||||
|
(?P<sort>(?(id)
|
||||||
|
(?:duration|rating|time|views)|
|
||||||
|
(?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
|
||||||
|
(?:[/#?]|$)
|
||||||
|
'''
|
||||||
|
_PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
|
||||||
|
_TESTS = [{
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn',
|
||||||
|
'title': 'YouPorn videos',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/recommended',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/recommended',
|
||||||
|
'title': 'YouPorn videos by recommended',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/top_rated',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/top_rated',
|
||||||
|
'title': 'YouPorn videos by top rated',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Full list with pagination (too long for test)',
|
||||||
|
'url': 'https://www.youporn.com/browse/time',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'browse/time',
|
||||||
|
'title': 'YouPorn videos by time',
|
||||||
|
},
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with single page result',
|
||||||
|
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/most_favorited/max_minutes=2/res=VR',
|
||||||
|
'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
'playlist_maxcount': 28,
|
||||||
|
}, {
|
||||||
|
'note': 'Filtered paginated list with several pages',
|
||||||
|
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'youporn/most_favorited/max_minutes=5/res=VR',
|
||||||
|
'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 45,
|
||||||
|
}, {
|
||||||
|
'note': 'Single page of full list',
|
||||||
|
'url': 'https://www.youporn.com/browse/time?page=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'browse/time/page=1',
|
||||||
|
'title': 'YouPorn videos by time (page=1)',
|
||||||
|
},
|
||||||
|
'playlist_count': 36,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_title_from_slug(title_slug):
|
||||||
|
return 'YouPorn' if title_slug == 'browse' else title_slug
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import collections
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import os.path
|
import os.path
|
||||||
@ -19,21 +20,26 @@ from ..compat import (
|
|||||||
compat_urllib_parse_parse_qs as compat_parse_qs,
|
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_zip as zip,
|
||||||
)
|
)
|
||||||
from ..jsinterp import JSInterpreter
|
from ..jsinterp import JSInterpreter
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
LazyList,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
NO_DEFAULT,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
|
parse_count,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
@ -41,8 +47,11 @@ from ..utils import (
|
|||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
T,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
|
txt_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
@ -256,16 +265,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
cookies = self._get_cookies('https://www.youtube.com/')
|
cookies = self._get_cookies('https://www.youtube.com/')
|
||||||
if cookies.get('__Secure-3PSID'):
|
if cookies.get('__Secure-3PSID'):
|
||||||
return
|
return
|
||||||
consent_id = None
|
socs = cookies.get('SOCS')
|
||||||
consent = cookies.get('CONSENT')
|
if socs and not socs.value.startswith('CAA'): # not consented
|
||||||
if consent:
|
|
||||||
if 'YES' in consent.value:
|
|
||||||
return
|
return
|
||||||
consent_id = self._search_regex(
|
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
|
||||||
r'PENDING\+(\d+)', consent.value, 'consent', default=None)
|
|
||||||
if not consent_id:
|
|
||||||
consent_id = random.randint(100, 999)
|
|
||||||
self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._initialize_consent()
|
self._initialize_consent()
|
||||||
@ -444,7 +447,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
extract_attributes(self._search_regex(
|
extract_attributes(self._search_regex(
|
||||||
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
|
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
|
||||||
% re.escape(var_name),
|
% re.escape(var_name),
|
||||||
get_element_by_attribute('itemprop', 'author', webpage) or '',
|
get_element_by_attribute('itemprop', 'author', webpage or '') or '',
|
||||||
'author link', default='')),
|
'author link', default='')),
|
||||||
paths[var_name][0])
|
paths[var_name][0])
|
||||||
|
|
||||||
@ -1249,7 +1252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'title': 'IMG 3456',
|
'title': 'IMG 3456',
|
||||||
'description': '',
|
'description': '',
|
||||||
'upload_date': '20170613',
|
'upload_date': '20170613',
|
||||||
'uploader': 'ElevageOrVert',
|
'uploader': "l'Or Vert asbl",
|
||||||
'uploader_id': '@ElevageOrVert',
|
'uploader_id': '@ElevageOrVert',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -1462,6 +1465,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
self._code_cache = {}
|
self._code_cache = {}
|
||||||
self._player_cache = {}
|
self._player_cache = {}
|
||||||
|
|
||||||
|
# *ytcfgs, webpage=None
|
||||||
|
def _extract_player_url(self, *ytcfgs, **kw_webpage):
|
||||||
|
if ytcfgs and not isinstance(ytcfgs[0], dict):
|
||||||
|
webpage = kw_webpage.get('webpage') or ytcfgs[0]
|
||||||
|
if webpage:
|
||||||
|
player_url = self._search_regex(
|
||||||
|
r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
|
||||||
|
webpage or '', 'player URL', fatal=False)
|
||||||
|
if player_url:
|
||||||
|
ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
|
||||||
|
return traverse_obj(
|
||||||
|
ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
|
||||||
|
get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
|
||||||
|
|
||||||
|
def _download_player_url(self, video_id, fatal=False):
|
||||||
|
res = self._download_webpage(
|
||||||
|
'https://www.youtube.com/iframe_api',
|
||||||
|
note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
|
||||||
|
player_version = self._search_regex(
|
||||||
|
r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal,
|
||||||
|
default=NO_DEFAULT if res else None)
|
||||||
|
if player_version:
|
||||||
|
return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
|
||||||
|
|
||||||
def _signature_cache_id(self, example_sig):
|
def _signature_cache_id(self, example_sig):
|
||||||
""" Return a string representation of a signature """
|
""" Return a string representation of a signature """
|
||||||
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||||
@ -1476,46 +1503,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
return id_m.group('id')
|
return id_m.group('id')
|
||||||
|
|
||||||
def _get_player_code(self, video_id, player_url, player_id=None):
|
def _load_player(self, video_id, player_url, fatal=True, player_id=None):
|
||||||
if not player_id:
|
if not player_id:
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
if player_id not in self._code_cache:
|
if player_id not in self._code_cache:
|
||||||
self._code_cache[player_id] = self._download_webpage(
|
code = self._download_webpage(
|
||||||
player_url, video_id,
|
player_url, video_id, fatal=fatal,
|
||||||
note='Downloading player ' + player_id,
|
note='Downloading player ' + player_id,
|
||||||
errnote='Download of %s failed' % player_url)
|
errnote='Download of %s failed' % player_url)
|
||||||
return self._code_cache[player_id]
|
if code:
|
||||||
|
self._code_cache[player_id] = code
|
||||||
|
return self._code_cache[player_id] if fatal else self._code_cache.get(player_id)
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
# Read from filesystem cache
|
# Read from filesystem cache
|
||||||
func_id = 'js_%s_%s' % (
|
func_id = 'js_{0}_{1}'.format(
|
||||||
player_id, self._signature_cache_id(example_sig))
|
player_id, self._signature_cache_id(example_sig))
|
||||||
assert os.path.basename(func_id) == func_id
|
assert os.path.basename(func_id) == func_id
|
||||||
|
|
||||||
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
|
self.write_debug('Extracting signature function {0}'.format(func_id))
|
||||||
if cache_spec is not None:
|
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
|
||||||
|
|
||||||
|
if not cache_spec:
|
||||||
|
code = self._load_player(video_id, player_url, player_id)
|
||||||
|
if code:
|
||||||
|
res = self._parse_sig_js(code)
|
||||||
|
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||||
|
cache_spec = [ord(c) for c in res(test_string)]
|
||||||
|
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||||
|
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
|
||||||
code = self._get_player_code(video_id, player_url, player_id)
|
|
||||||
res = self._parse_sig_js(code)
|
|
||||||
|
|
||||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
|
||||||
cache_res = res(test_string)
|
|
||||||
cache_spec = [ord(c) for c in cache_res]
|
|
||||||
|
|
||||||
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
|
||||||
return res
|
|
||||||
|
|
||||||
def _print_sig_code(self, func, example_sig):
|
def _print_sig_code(self, func, example_sig):
|
||||||
|
if not self.get_param('youtube_print_sig_code'):
|
||||||
|
return
|
||||||
|
|
||||||
def gen_sig_code(idxs):
|
def gen_sig_code(idxs):
|
||||||
def _genslice(start, end, step):
|
def _genslice(start, end, step):
|
||||||
starts = '' if start == 0 else str(start)
|
starts = '' if start == 0 else str(start)
|
||||||
ends = (':%d' % (end + step)) if end + step >= 0 else ':'
|
ends = (':%d' % (end + step)) if end + step >= 0 else ':'
|
||||||
steps = '' if step == 1 else (':%d' % step)
|
steps = '' if step == 1 else (':%d' % step)
|
||||||
return 's[%s%s%s]' % (starts, ends, steps)
|
return 's[{0}{1}{2}]'.format(starts, ends, steps)
|
||||||
|
|
||||||
step = None
|
step = None
|
||||||
# Quelch pyflakes warnings - start will be set when step is set
|
# Quelch pyflakes warnings - start will be set when step is set
|
||||||
@ -1554,17 +1584,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
|
||||||
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
|
||||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
|
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
|
||||||
r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
|
||||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||||
# Obsolete patterns
|
# Obsolete patterns
|
||||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
||||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
|
||||||
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||||
jscode, 'Initial JS player signature function name', group='sig')
|
jscode, 'Initial JS player signature function name', group='sig')
|
||||||
|
|
||||||
@ -1572,131 +1599,163 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
initial_function = jsi.extract_function(funcname)
|
initial_function = jsi.extract_function(funcname)
|
||||||
return lambda s: initial_function([s])
|
return lambda s: initial_function([s])
|
||||||
|
|
||||||
|
def _cached(self, func, *cache_id):
|
||||||
|
def inner(*args, **kwargs):
|
||||||
|
if cache_id not in self._player_cache:
|
||||||
|
try:
|
||||||
|
self._player_cache[cache_id] = func(*args, **kwargs)
|
||||||
|
except ExtractorError as e:
|
||||||
|
self._player_cache[cache_id] = e
|
||||||
|
except Exception as e:
|
||||||
|
self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
|
||||||
|
|
||||||
|
ret = self._player_cache[cache_id]
|
||||||
|
if isinstance(ret, Exception):
|
||||||
|
raise ret
|
||||||
|
return ret
|
||||||
|
return inner
|
||||||
|
|
||||||
def _decrypt_signature(self, s, video_id, player_url):
|
def _decrypt_signature(self, s, video_id, player_url):
|
||||||
"""Turn the encrypted s field into a working signature"""
|
"""Turn the encrypted s field into a working signature"""
|
||||||
|
extract_sig = self._cached(
|
||||||
if player_url is None:
|
self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
|
||||||
raise ExtractorError('Cannot decrypt signature without player_url')
|
func = extract_sig(video_id, player_url, s)
|
||||||
|
|
||||||
try:
|
|
||||||
player_id = (player_url, self._signature_cache_id(s))
|
|
||||||
if player_id not in self._player_cache:
|
|
||||||
func = self._extract_signature_function(
|
|
||||||
video_id, player_url, s
|
|
||||||
)
|
|
||||||
self._player_cache[player_id] = func
|
|
||||||
func = self._player_cache[player_id]
|
|
||||||
if self._downloader.params.get('youtube_print_sig_code'):
|
|
||||||
self._print_sig_code(func, s)
|
self._print_sig_code(func, s)
|
||||||
return func(s)
|
return func(s)
|
||||||
except Exception as e:
|
|
||||||
tb = traceback.format_exc()
|
|
||||||
raise ExtractorError(
|
|
||||||
'Signature extraction failed: ' + tb, cause=e)
|
|
||||||
|
|
||||||
def _extract_player_url(self, webpage):
|
|
||||||
player_url = self._search_regex(
|
|
||||||
r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
|
|
||||||
webpage or '', 'player URL', fatal=False)
|
|
||||||
if not player_url:
|
|
||||||
return
|
|
||||||
if player_url.startswith('//'):
|
|
||||||
player_url = 'https:' + player_url
|
|
||||||
elif not re.match(r'https?://', player_url):
|
|
||||||
player_url = compat_urllib_parse.urljoin(
|
|
||||||
'https://www.youtube.com', player_url)
|
|
||||||
return player_url
|
|
||||||
|
|
||||||
# from yt-dlp
|
# from yt-dlp
|
||||||
# See also:
|
# See also:
|
||||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
|
||||||
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
|
||||||
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
|
||||||
def _extract_n_function_name(self, jscode):
|
def _decrypt_nsig(self, n, video_id, player_url):
|
||||||
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?'
|
"""Turn the encrypted n field into a working signature"""
|
||||||
nfunc_and_idx = self._search_regex(
|
if player_url is None:
|
||||||
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ),
|
raise ExtractorError('Cannot decrypt nsig without player_url')
|
||||||
jscode, 'Initial JS player n function name')
|
|
||||||
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
|
|
||||||
if not idx:
|
|
||||||
return nfunc
|
|
||||||
if int_or_none(idx) == 0:
|
|
||||||
real_nfunc = self._search_regex(
|
|
||||||
r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
|
|
||||||
'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
|
|
||||||
if real_nfunc:
|
|
||||||
return real_nfunc
|
|
||||||
return self._parse_json(self._search_regex(
|
|
||||||
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode,
|
|
||||||
'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
|
|
||||||
|
|
||||||
def _extract_n_function(self, video_id, player_url):
|
|
||||||
player_id = self._extract_player_info(player_url)
|
|
||||||
func_code = self._downloader.cache.load('youtube-nsig', player_id)
|
|
||||||
|
|
||||||
if func_code:
|
|
||||||
jsi = JSInterpreter(func_code)
|
|
||||||
else:
|
|
||||||
jscode = self._get_player_code(video_id, player_url, player_id)
|
|
||||||
funcname = self._extract_n_function_name(jscode)
|
|
||||||
jsi = JSInterpreter(jscode)
|
|
||||||
func_code = jsi.extract_function_code(funcname)
|
|
||||||
self._downloader.cache.store('youtube-nsig', player_id, func_code)
|
|
||||||
|
|
||||||
if self._downloader.params.get('youtube_print_sig_code'):
|
|
||||||
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(player_id, func_code[1]))
|
|
||||||
|
|
||||||
return lambda s: jsi.extract_function_from_code(*func_code)([s])
|
|
||||||
|
|
||||||
def _n_descramble(self, n_param, player_url, video_id):
|
|
||||||
"""Compute the response to YT's "n" parameter challenge,
|
|
||||||
or None
|
|
||||||
|
|
||||||
Args:
|
|
||||||
n_param -- challenge string that is the value of the
|
|
||||||
URL's "n" query parameter
|
|
||||||
player_url -- URL of YT player JS
|
|
||||||
video_id
|
|
||||||
"""
|
|
||||||
|
|
||||||
sig_id = ('nsig_value', n_param)
|
|
||||||
if sig_id in self._player_cache:
|
|
||||||
return self._player_cache[sig_id]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
player_id = ('nsig', player_url)
|
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
|
||||||
if player_id not in self._player_cache:
|
except ExtractorError as e:
|
||||||
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
|
raise ExtractorError('Unable to extract nsig function code', cause=e)
|
||||||
func = self._player_cache[player_id]
|
if self.get_param('youtube_print_sig_code'):
|
||||||
ret = func(n_param)
|
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
|
||||||
if ret.startswith('enhanced_except_'):
|
player_id, func_code[1]))
|
||||||
raise ExtractorError('Unhandled exception in decode')
|
|
||||||
self._player_cache[sig_id] = ret
|
try:
|
||||||
if self._downloader.params.get('verbose', False):
|
extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
|
||||||
self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
|
ret = extract_nsig(jsi, func_code)(n)
|
||||||
return self._player_cache[sig_id]
|
except JSInterpreter.Exception as e:
|
||||||
except Exception as e:
|
self.report_warning(
|
||||||
self._downloader.report_warning(
|
'%s (%s %s)' % (
|
||||||
'[%s] %s (%s %s)' % (
|
'Unable to decode n-parameter: expect download to be blocked or throttled',
|
||||||
self.IE_NAME,
|
error_to_compat_str(e),
|
||||||
'Unable to decode n-parameter: download likely to be throttled',
|
traceback.format_exc()),
|
||||||
error_to_compat_str(e),
|
video_id=video_id)
|
||||||
traceback.format_exc()))
|
return
|
||||||
|
|
||||||
|
self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def _extract_n_function_name(self, jscode):
|
||||||
|
func_name, idx = self._search_regex(
|
||||||
|
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
|
||||||
|
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
|
||||||
|
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
|
||||||
|
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
|
||||||
|
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||||
|
# older: (b=a.get("n"))&&(b=nfunc(b)
|
||||||
|
r'''(?x)
|
||||||
|
\((?:[\w$()\s]+,)*?\s* # (
|
||||||
|
(?P<b>[a-z])\s*=\s* # b=
|
||||||
|
(?:
|
||||||
|
(?: # expect ,c=a.get(b) (etc)
|
||||||
|
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
|
||||||
|
"n+"\[\s*\+?s*[\w$.]+\s*]
|
||||||
|
)\s*(?:,[\w$()\s]+(?=,))*|
|
||||||
|
(?P<old>[\w$]+) # a (old[er])
|
||||||
|
)\s*
|
||||||
|
(?(old)
|
||||||
|
# b.get("n")
|
||||||
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||||
|
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
|
||||||
|
| # ,c=a.get(b)
|
||||||
|
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
|
||||||
|
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
|
||||||
|
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
|
||||||
|
)
|
||||||
|
# interstitial junk
|
||||||
|
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
|
||||||
|
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
|
||||||
|
# nfunc|nfunc[idx]
|
||||||
|
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
|
||||||
|
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||||
|
default=(None, None))
|
||||||
|
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||||
|
if not func_name:
|
||||||
|
self.report_warning('Falling back to generic n function search')
|
||||||
|
return self._search_regex(
|
||||||
|
r'''(?xs)
|
||||||
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||||
|
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
|
||||||
|
\s*\{(?:(?!};).)+?["']enhanced_except_
|
||||||
|
''', jscode, 'Initial JS player n function name', group='name')
|
||||||
|
if not idx:
|
||||||
|
return func_name
|
||||||
|
|
||||||
|
return self._parse_json(self._search_regex(
|
||||||
|
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
|
||||||
|
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
|
||||||
|
func_name, transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
|
def _extract_n_function_code(self, video_id, player_url):
|
||||||
|
player_id = self._extract_player_info(player_url)
|
||||||
|
func_code = self.cache.load('youtube-nsig', player_id)
|
||||||
|
jscode = func_code or self._load_player(video_id, player_url)
|
||||||
|
jsi = JSInterpreter(jscode)
|
||||||
|
|
||||||
|
if func_code:
|
||||||
|
return jsi, player_id, func_code
|
||||||
|
|
||||||
|
func_name = self._extract_n_function_name(jscode)
|
||||||
|
|
||||||
|
func_code = jsi.extract_function_code(func_name)
|
||||||
|
|
||||||
|
self.cache.store('youtube-nsig', player_id, func_code)
|
||||||
|
return jsi, player_id, func_code
|
||||||
|
|
||||||
|
def _extract_n_function_from_code(self, jsi, func_code):
|
||||||
|
func = jsi.extract_function_from_code(*func_code)
|
||||||
|
|
||||||
|
def extract_nsig(s):
|
||||||
|
try:
|
||||||
|
ret = func([s])
|
||||||
|
except JSInterpreter.Exception:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
|
||||||
|
|
||||||
|
if ret.startswith('enhanced_except_'):
|
||||||
|
raise JSInterpreter.Exception('Signature function returned an exception')
|
||||||
|
return ret
|
||||||
|
|
||||||
|
return extract_nsig
|
||||||
|
|
||||||
|
def _unthrottle_format_urls(self, video_id, player_url, *formats):
|
||||||
|
|
||||||
|
def decrypt_nsig(n):
|
||||||
|
return self._cached(self._decrypt_nsig, 'nsig', n, player_url)
|
||||||
|
|
||||||
def _unthrottle_format_urls(self, video_id, player_url, formats):
|
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
|
parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
|
||||||
n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
|
n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
|
||||||
if not n_param:
|
if not n_param:
|
||||||
continue
|
continue
|
||||||
n_param = n_param[-1]
|
n_param = n_param[-1]
|
||||||
n_response = self._n_descramble(n_param, player_url, video_id)
|
n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
|
||||||
if n_response is None:
|
if n_response is None:
|
||||||
# give up if descrambling failed
|
# give up if descrambling failed
|
||||||
break
|
break
|
||||||
for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict):
|
fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
|
||||||
fmt_dct['url'] = update_url(
|
|
||||||
fmt_dct['url'], query_update={'n': [n_response]})
|
|
||||||
|
|
||||||
# from yt-dlp, with tweaks
|
# from yt-dlp, with tweaks
|
||||||
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||||||
@ -1704,16 +1763,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
Extract signatureTimestamp (sts)
|
Extract signatureTimestamp (sts)
|
||||||
Required to tell API what sig/player version is in use.
|
Required to tell API what sig/player version is in use.
|
||||||
"""
|
"""
|
||||||
sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
|
sts = traverse_obj(ytcfg, 'STS', expected_type=int)
|
||||||
if not sts:
|
if not sts:
|
||||||
# Attempt to extract from player
|
# Attempt to extract from player
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
error_msg = 'Cannot extract signature timestamp without player_url.'
|
error_msg = 'Cannot extract signature timestamp without player_url.'
|
||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(error_msg)
|
raise ExtractorError(error_msg)
|
||||||
self._downloader.report_warning(error_msg)
|
self.report_warning(error_msg)
|
||||||
return
|
return
|
||||||
code = self._get_player_code(video_id, player_url)
|
code = self._load_player(video_id, player_url, fatal=fatal)
|
||||||
sts = int_or_none(self._search_regex(
|
sts = int_or_none(self._search_regex(
|
||||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
|
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
|
||||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||||
@ -1729,12 +1788,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# cpn generation algorithm is reverse engineered from base.js.
|
# cpn generation algorithm is reverse engineered from base.js.
|
||||||
# In fact it works even with dummy cpn.
|
# In fact it works even with dummy cpn.
|
||||||
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
||||||
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
|
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
|
||||||
|
|
||||||
playback_url = update_url(
|
# more consistent results setting it to right before the end
|
||||||
playback_url, query_update={
|
qs = parse_qs(playback_url)
|
||||||
'ver': ['2'],
|
video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1)
|
||||||
'cpn': [cpn],
|
|
||||||
|
playback_url = update_url_query(
|
||||||
|
playback_url, {
|
||||||
|
'ver': '2',
|
||||||
|
'cpn': cpn,
|
||||||
|
'cmt': video_length,
|
||||||
|
'el': 'detailpage', # otherwise defaults to "shorts"
|
||||||
})
|
})
|
||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
@ -1982,97 +2047,162 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
|
||||||
|
|
||||||
|
if not player_url:
|
||||||
|
player_url = self._extract_player_url(webpage)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
itags = []
|
itags = collections.defaultdict(set)
|
||||||
itag_qualities = {}
|
itag_qualities = {}
|
||||||
q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
|
q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
|
||||||
|
CHUNK_SIZE = 10 << 20
|
||||||
|
|
||||||
streaming_data = player_response.get('streamingData') or {}
|
streaming_data = player_response.get('streamingData') or {}
|
||||||
streaming_formats = streaming_data.get('formats') or []
|
streaming_formats = streaming_data.get('formats') or []
|
||||||
streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
|
streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
|
||||||
|
|
||||||
|
def build_fragments(f):
|
||||||
|
return LazyList({
|
||||||
|
'url': update_url_query(f['url'], {
|
||||||
|
'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, f['filesize']))
|
||||||
|
})
|
||||||
|
} for range_start in range(0, f['filesize'], CHUNK_SIZE))
|
||||||
|
|
||||||
|
lower = lambda s: s.lower()
|
||||||
|
|
||||||
for fmt in streaming_formats:
|
for fmt in streaming_formats:
|
||||||
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
|
if fmt.get('targetDurationSec'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
itag = str_or_none(fmt.get('itag'))
|
itag = str_or_none(fmt.get('itag'))
|
||||||
quality = fmt.get('quality')
|
audio_track = traverse_obj(fmt, ('audioTrack', T(dict))) or {}
|
||||||
if itag and quality:
|
|
||||||
|
quality = traverse_obj(fmt, ((
|
||||||
|
# The 3gp format (17) in android client has a quality of "small",
|
||||||
|
# but is actually worse than other formats
|
||||||
|
T(lambda _: 'tiny' if itag == 17 else None),
|
||||||
|
('quality', T(lambda q: q if q and q != 'tiny' else None)),
|
||||||
|
('audioQuality', T(lower)),
|
||||||
|
'quality'), T(txt_or_none)), get_all=False)
|
||||||
|
if quality and itag:
|
||||||
itag_qualities[itag] = quality
|
itag_qualities[itag] = quality
|
||||||
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
|
||||||
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
|
# (adding `&sq=0` to the URL) and parsing emsg box to determine the
|
||||||
# number of fragment that would subsequently requested with (`&sq=N`)
|
# number of fragments that would subsequently be requested with (`&sq=N`)
|
||||||
if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
|
if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
fmt_url = fmt.get('url')
|
fmt_url = fmt.get('url')
|
||||||
if not fmt_url:
|
if not fmt_url:
|
||||||
sc = compat_parse_qs(fmt.get('signatureCipher'))
|
sc = compat_parse_qs(fmt.get('signatureCipher'))
|
||||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
fmt_url = traverse_obj(sc, ('url', -1, T(url_or_none)))
|
||||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
encrypted_sig = traverse_obj(sc, ('s', -1))
|
||||||
if not (sc and fmt_url and encrypted_sig):
|
if not (fmt_url and encrypted_sig):
|
||||||
continue
|
continue
|
||||||
if not player_url:
|
player_url = player_url or self._extract_player_url(webpage)
|
||||||
player_url = self._extract_player_url(webpage)
|
|
||||||
if not player_url:
|
if not player_url:
|
||||||
continue
|
continue
|
||||||
signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
|
try:
|
||||||
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
|
fmt_url = update_url_query(fmt_url, {
|
||||||
fmt_url += '&' + sp + '=' + signature
|
traverse_obj(sc, ('sp', -1)) or 'signature':
|
||||||
|
[self._decrypt_signature(encrypted_sig, video_id, player_url)],
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
self.report_warning('Signature extraction failed: Some formats may be missing',
|
||||||
|
video_id=video_id, only_once=True)
|
||||||
|
self.write_debug(error_to_compat_str(e), only_once=True)
|
||||||
|
continue
|
||||||
|
|
||||||
if itag:
|
language_preference = (
|
||||||
itags.append(itag)
|
10 if audio_track.get('audioIsDefault')
|
||||||
tbr = float_or_none(
|
else -10 if 'descriptive' in (traverse_obj(audio_track, ('displayName', T(lower))) or '')
|
||||||
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
|
else -1)
|
||||||
|
name = (
|
||||||
|
traverse_obj(fmt, ('qualityLabel', T(txt_or_none)))
|
||||||
|
or quality.replace('audio_quality_', ''))
|
||||||
dct = {
|
dct = {
|
||||||
'asr': int_or_none(fmt.get('audioSampleRate')),
|
'format_id': join_nonempty(itag, fmt.get('isDrc') and 'drc'),
|
||||||
'filesize': int_or_none(fmt.get('contentLength')),
|
|
||||||
'format_id': itag,
|
|
||||||
'format_note': fmt.get('qualityLabel') or quality,
|
|
||||||
'fps': int_or_none(fmt.get('fps')),
|
|
||||||
'height': int_or_none(fmt.get('height')),
|
|
||||||
'quality': q(quality),
|
|
||||||
'tbr': tbr,
|
|
||||||
'url': fmt_url,
|
'url': fmt_url,
|
||||||
'width': fmt.get('width'),
|
# Format 22 is likely to be damaged: see https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||||
|
'source_preference': ((-5 if itag == '22' else -1)
|
||||||
|
+ (100 if 'Premium' in name else 0)),
|
||||||
|
'quality': q(quality),
|
||||||
|
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||||
|
'desc' if language_preference < -1 else '') or None,
|
||||||
|
'language_preference': language_preference,
|
||||||
|
# Strictly de-prioritize 3gp formats
|
||||||
|
'preference': -2 if itag == '17' else None,
|
||||||
}
|
}
|
||||||
mimetype = fmt.get('mimeType')
|
if itag:
|
||||||
if mimetype:
|
itags[itag].add(('https', dct.get('language')))
|
||||||
mobj = re.match(
|
self._unthrottle_format_urls(video_id, player_url, dct)
|
||||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
|
dct.update(traverse_obj(fmt, {
|
||||||
if mobj:
|
'asr': ('audioSampleRate', T(int_or_none)),
|
||||||
dct['ext'] = mimetype2ext(mobj.group(1))
|
'filesize': ('contentLength', T(int_or_none)),
|
||||||
dct.update(parse_codecs(mobj.group(2)))
|
'format_note': ('qualityLabel', T(lambda x: x or quality)),
|
||||||
no_audio = dct.get('acodec') == 'none'
|
# for some formats, fps is wrongly returned as 1
|
||||||
no_video = dct.get('vcodec') == 'none'
|
'fps': ('fps', T(int_or_none), T(lambda f: f if f > 1 else None)),
|
||||||
if no_audio:
|
'audio_channels': ('audioChannels', T(int_or_none)),
|
||||||
dct['vbr'] = tbr
|
'height': ('height', T(int_or_none)),
|
||||||
if no_video:
|
'has_drm': ('drmFamilies', T(bool)),
|
||||||
dct['abr'] = tbr
|
'tbr': (('averageBitrate', 'bitrate'), T(lambda t: float_or_none(t, 1000))),
|
||||||
if no_audio or no_video:
|
'width': ('width', T(int_or_none)),
|
||||||
CHUNK_SIZE = 10 << 20
|
'_duration_ms': ('approxDurationMs', T(int_or_none)),
|
||||||
|
}, get_all=False))
|
||||||
|
mime_mobj = re.match(
|
||||||
|
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||||
|
if mime_mobj:
|
||||||
|
dct['ext'] = mimetype2ext(mime_mobj.group(1))
|
||||||
|
dct.update(parse_codecs(mime_mobj.group(2)))
|
||||||
|
single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
|
||||||
|
if single_stream and dct.get('ext'):
|
||||||
|
dct['container'] = dct['ext'] + '_dash'
|
||||||
|
if single_stream or itag == '17':
|
||||||
# avoid Youtube throttling
|
# avoid Youtube throttling
|
||||||
dct.update({
|
dct.update({
|
||||||
'protocol': 'http_dash_segments',
|
'protocol': 'http_dash_segments',
|
||||||
'fragments': [{
|
'fragments': build_fragments(dct),
|
||||||
'url': update_url_query(dct['url'], {
|
|
||||||
'range': '{0}-{1}'.format(range_start, min(range_start + CHUNK_SIZE - 1, dct['filesize']))
|
|
||||||
})
|
|
||||||
} for range_start in range(0, dct['filesize'], CHUNK_SIZE)]
|
|
||||||
} if dct['filesize'] else {
|
} if dct['filesize'] else {
|
||||||
'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful?
|
'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful?
|
||||||
})
|
})
|
||||||
|
|
||||||
if dct.get('ext'):
|
|
||||||
dct['container'] = dct['ext'] + '_dash'
|
|
||||||
formats.append(dct)
|
formats.append(dct)
|
||||||
|
|
||||||
|
def process_manifest_format(f, proto, client_name, itag, all_formats=False):
|
||||||
|
key = (proto, f.get('language'))
|
||||||
|
if not all_formats and key in itags[itag]:
|
||||||
|
return False
|
||||||
|
itags[itag].add(key)
|
||||||
|
|
||||||
|
if itag:
|
||||||
|
f['format_id'] = (
|
||||||
|
'{0}-{1}'.format(itag, proto)
|
||||||
|
if all_formats or any(p != proto for p, _ in itags[itag])
|
||||||
|
else itag)
|
||||||
|
|
||||||
|
if f.get('source_preference') is None:
|
||||||
|
f['source_preference'] = -1
|
||||||
|
|
||||||
|
if itag in ('616', '235'):
|
||||||
|
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
|
||||||
|
f['source_preference'] += 100
|
||||||
|
|
||||||
|
f['quality'] = q(traverse_obj(f, (
|
||||||
|
'format_id', T(lambda s: itag_qualities[s.split('-')[0]])), default=-1))
|
||||||
|
if try_call(lambda: f['fps'] <= 1):
|
||||||
|
del f['fps']
|
||||||
|
|
||||||
|
if proto == 'hls' and f.get('has_drm'):
|
||||||
|
f['has_drm'] = 'maybe'
|
||||||
|
f['source_preference'] -= 5
|
||||||
|
return True
|
||||||
|
|
||||||
hls_manifest_url = streaming_data.get('hlsManifestUrl')
|
hls_manifest_url = streaming_data.get('hlsManifestUrl')
|
||||||
if hls_manifest_url:
|
if hls_manifest_url:
|
||||||
for f in self._extract_m3u8_formats(
|
for f in self._extract_m3u8_formats(
|
||||||
hls_manifest_url, video_id, 'mp4', fatal=False):
|
hls_manifest_url, video_id, 'mp4', fatal=False):
|
||||||
itag = self._search_regex(
|
if process_manifest_format(
|
||||||
r'/itag/(\d+)', f['url'], 'itag', default=None)
|
f, 'hls', None, self._search_regex(
|
||||||
if itag:
|
r'/itag/(\d+)', f['url'], 'itag', default=None)):
|
||||||
f['format_id'] = itag
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
@ -2080,18 +2210,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if dash_manifest_url:
|
if dash_manifest_url:
|
||||||
for f in self._extract_mpd_formats(
|
for f in self._extract_mpd_formats(
|
||||||
dash_manifest_url, video_id, fatal=False):
|
dash_manifest_url, video_id, fatal=False):
|
||||||
itag = f['format_id']
|
if process_manifest_format(
|
||||||
if itag in itags:
|
f, 'dash', None, f['format_id']):
|
||||||
continue
|
f['filesize'] = traverse_obj(f, (
|
||||||
if itag in itag_qualities:
|
('fragment_base_url', 'url'), T(lambda u: self._search_regex(
|
||||||
f['quality'] = q(itag_qualities[itag])
|
r'/clen/(\d+)', u, 'file size', default=None)),
|
||||||
filesize = int_or_none(self._search_regex(
|
T(int_or_none)), get_all=False)
|
||||||
r'/clen/(\d+)', f.get('fragment_base_url')
|
|
||||||
or f['url'], 'file size', default=None))
|
|
||||||
if filesize:
|
|
||||||
f['filesize'] = filesize
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
|
playable_formats = [f for f in formats if not f.get('has_drm')]
|
||||||
|
if formats and not playable_formats:
|
||||||
|
# If there are no formats that definitely don't have DRM, all have DRM
|
||||||
|
self.report_drm(video_id)
|
||||||
|
formats[:] = playable_formats
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
if streaming_data.get('licenseInfos'):
|
if streaming_data.get('licenseInfos'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
@ -2162,6 +2294,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_details.get('lengthSeconds')
|
video_details.get('lengthSeconds')
|
||||||
or microformat.get('lengthSeconds')) \
|
or microformat.get('lengthSeconds')) \
|
||||||
or parse_duration(search_meta('duration'))
|
or parse_duration(search_meta('duration'))
|
||||||
|
|
||||||
|
for f in formats:
|
||||||
|
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||||||
|
# but avoid false positives with small duration differences.
|
||||||
|
# Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||||
|
if try_call(lambda x: float(x.pop('_duration_ms')) / duration < 500, args=(f,)):
|
||||||
|
self.report_warning(
|
||||||
|
'{0}: Some possibly damaged formats will be deprioritized'.format(video_id), only_once=True)
|
||||||
|
# Strictly de-prioritize damaged formats
|
||||||
|
f['preference'] = -10
|
||||||
|
|
||||||
is_live = video_details.get('isLive')
|
is_live = video_details.get('isLive')
|
||||||
|
|
||||||
owner_profile_url = self._yt_urljoin(self._extract_author_var(
|
owner_profile_url = self._yt_urljoin(self._extract_author_var(
|
||||||
@ -2170,10 +2313,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
uploader = self._extract_author_var(
|
uploader = self._extract_author_var(
|
||||||
webpage, 'name', videodetails=video_details, metadata=microformat)
|
webpage, 'name', videodetails=video_details, metadata=microformat)
|
||||||
|
|
||||||
if not player_url:
|
|
||||||
player_url = self._extract_player_url(webpage)
|
|
||||||
self._unthrottle_format_urls(video_id, player_url, formats)
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(video_title) if is_live else video_title,
|
'title': self._live_title(video_title) if is_live else video_title,
|
||||||
@ -2366,6 +2505,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'like_count': str_to_int(like_count),
|
'like_count': str_to_int(like_count),
|
||||||
'dislike_count': str_to_int(dislike_count),
|
'dislike_count': str_to_int(dislike_count),
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
info['like_count'] = traverse_obj(vpir, (
|
||||||
|
'videoActions', 'menuRenderer', 'topLevelButtons', Ellipsis,
|
||||||
|
'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
|
||||||
|
'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
|
||||||
|
'buttonViewModel', (('title', ('accessibilityText', T(lambda s: s.split()), Ellipsis))), T(parse_count)),
|
||||||
|
get_all=False)
|
||||||
|
|
||||||
vsir = content.get('videoSecondaryInfoRenderer')
|
vsir = content.get('videoSecondaryInfoRenderer')
|
||||||
if vsir:
|
if vsir:
|
||||||
rows = try_get(
|
rows = try_get(
|
||||||
@ -2480,7 +2627,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'playlist_mincount': 94,
|
'playlist_mincount': 94,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'title': 'Igor Kleiner - Playlists',
|
'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
|
||||||
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
|
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
|
||||||
'uploader': 'Igor Kleiner',
|
'uploader': 'Igor Kleiner',
|
||||||
'uploader_id': '@IgorDataScience',
|
'uploader_id': '@IgorDataScience',
|
||||||
@ -2491,7 +2638,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'playlist_mincount': 94,
|
'playlist_mincount': 94,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||||
'title': 'Igor Kleiner - Playlists',
|
'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
|
||||||
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
|
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
|
||||||
'uploader': 'Igor Kleiner',
|
'uploader': 'Igor Kleiner',
|
||||||
'uploader_id': '@IgorDataScience',
|
'uploader_id': '@IgorDataScience',
|
||||||
@ -2603,12 +2750,23 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
|
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||||
'title': 'lex will - Channels',
|
'title': r're:lex will - (?:Home|Channels)',
|
||||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||||
'uploader': 'lex will',
|
'uploader': 'lex will',
|
||||||
'uploader_id': '@lexwill718',
|
'uploader_id': '@lexwill718',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 75,
|
'playlist_mincount': 75,
|
||||||
|
}, {
|
||||||
|
# Releases tab
|
||||||
|
'url': 'https://www.youtube.com/@daftpunk/releases',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
|
||||||
|
'title': 'Daft Punk - Releases',
|
||||||
|
'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
|
||||||
|
'uploader_id': '@daftpunk',
|
||||||
|
'uploader': 'Daft Punk',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 36,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -2823,6 +2981,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
continue
|
continue
|
||||||
return renderer
|
return renderer
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_text(r, k):
|
||||||
|
return traverse_obj(
|
||||||
|
r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
|
||||||
|
expected_type=txt_or_none)
|
||||||
|
|
||||||
def _grid_entries(self, grid_renderer):
|
def _grid_entries(self, grid_renderer):
|
||||||
for item in grid_renderer['items']:
|
for item in grid_renderer['items']:
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
@ -2830,9 +2994,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
renderer = self._extract_grid_item_renderer(item)
|
renderer = self._extract_grid_item_renderer(item)
|
||||||
if not isinstance(renderer, dict):
|
if not isinstance(renderer, dict):
|
||||||
continue
|
continue
|
||||||
title = try_get(
|
title = self._get_text(renderer, 'title')
|
||||||
renderer, (lambda x: x['title']['runs'][0]['text'],
|
|
||||||
lambda x: x['title']['simpleText']), compat_str)
|
|
||||||
# playlist
|
# playlist
|
||||||
playlist_id = renderer.get('playlistId')
|
playlist_id = renderer.get('playlistId')
|
||||||
if playlist_id:
|
if playlist_id:
|
||||||
@ -2849,8 +3011,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
# channel
|
# channel
|
||||||
channel_id = renderer.get('channelId')
|
channel_id = renderer.get('channelId')
|
||||||
if channel_id:
|
if channel_id:
|
||||||
title = try_get(
|
title = self._get_text(renderer, 'title')
|
||||||
renderer, lambda x: x['title']['simpleText'], compat_str)
|
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
'https://www.youtube.com/channel/%s' % channel_id,
|
'https://www.youtube.com/channel/%s' % channel_id,
|
||||||
ie=YoutubeTabIE.ie_key(), video_title=title)
|
ie=YoutubeTabIE.ie_key(), video_title=title)
|
||||||
@ -2959,15 +3120,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _rich_grid_entries(self, contents):
|
def _rich_grid_entries(self, contents):
|
||||||
for content in contents:
|
for content in contents:
|
||||||
video_renderer = try_get(
|
content = traverse_obj(
|
||||||
content,
|
content, ('richItemRenderer', 'content'),
|
||||||
(lambda x: x['richItemRenderer']['content']['videoRenderer'],
|
expected_type=dict) or {}
|
||||||
lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
|
video_renderer = traverse_obj(
|
||||||
dict)
|
content, 'videoRenderer', 'reelItemRenderer',
|
||||||
|
expected_type=dict)
|
||||||
if video_renderer:
|
if video_renderer:
|
||||||
entry = self._video_entry(video_renderer)
|
entry = self._video_entry(video_renderer)
|
||||||
if entry:
|
if entry:
|
||||||
yield entry
|
yield entry
|
||||||
|
# playlist
|
||||||
|
renderer = traverse_obj(
|
||||||
|
content, 'playlistRenderer', expected_type=dict) or {}
|
||||||
|
title = self._get_text(renderer, 'title')
|
||||||
|
playlist_id = renderer.get('playlistId')
|
||||||
|
if playlist_id:
|
||||||
|
yield self.url_result(
|
||||||
|
'https://www.youtube.com/playlist?list=%s' % playlist_id,
|
||||||
|
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||||
|
video_title=title)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_continuation_query(continuation, ctp=None):
|
def _build_continuation_query(continuation, ctp=None):
|
||||||
@ -3072,6 +3244,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
return
|
return
|
||||||
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
continuation = self._extract_continuation(rich_grid_renderer)
|
continuation = self._extract_continuation(rich_grid_renderer)
|
||||||
|
|
||||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||||
@ -3214,50 +3387,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
uploader['channel'] = uploader['uploader']
|
uploader['channel'] = uploader['uploader']
|
||||||
return uploader
|
return uploader
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def _extract_alert(data):
|
def _extract_alert(cls, data):
|
||||||
alerts = []
|
alerts = []
|
||||||
for alert in try_get(data, lambda x: x['alerts'], list) or []:
|
for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict):
|
||||||
if not isinstance(alert, dict):
|
alert_text = traverse_obj(
|
||||||
continue
|
alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)
|
||||||
alert_text = try_get(
|
|
||||||
alert, lambda x: x['alertRenderer']['text'], dict)
|
|
||||||
if not alert_text:
|
if not alert_text:
|
||||||
continue
|
continue
|
||||||
text = try_get(
|
text = cls._get_text(alert_text, 'text')
|
||||||
alert_text,
|
|
||||||
(lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
|
|
||||||
compat_str)
|
|
||||||
if text:
|
if text:
|
||||||
alerts.append(text)
|
alerts.append(text)
|
||||||
return '\n'.join(alerts)
|
return '\n'.join(alerts)
|
||||||
|
|
||||||
def _extract_from_tabs(self, item_id, webpage, data, tabs):
|
def _extract_from_tabs(self, item_id, webpage, data, tabs):
|
||||||
selected_tab = self._extract_selected_tab(tabs)
|
selected_tab = self._extract_selected_tab(tabs)
|
||||||
renderer = try_get(
|
renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'),
|
||||||
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
expected_type=dict) or {}
|
||||||
playlist_id = item_id
|
playlist_id = item_id
|
||||||
title = description = None
|
title = description = None
|
||||||
if renderer:
|
if renderer:
|
||||||
channel_title = renderer.get('title') or item_id
|
channel_title = txt_or_none(renderer.get('title')) or item_id
|
||||||
tab_title = selected_tab.get('title')
|
tab_title = txt_or_none(selected_tab.get('title'))
|
||||||
title = channel_title or item_id
|
title = join_nonempty(
|
||||||
if tab_title:
|
channel_title or item_id, tab_title,
|
||||||
title += ' - %s' % tab_title
|
txt_or_none(selected_tab.get('expandedText')),
|
||||||
if selected_tab.get('expandedText'):
|
delim=' - ')
|
||||||
title += ' - %s' % selected_tab['expandedText']
|
description = txt_or_none(renderer.get('description'))
|
||||||
description = renderer.get('description')
|
playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id
|
||||||
playlist_id = renderer.get('externalId')
|
|
||||||
else:
|
else:
|
||||||
renderer = try_get(
|
renderer = traverse_obj(data,
|
||||||
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
|
('metadata', 'playlistMetadataRenderer'),
|
||||||
if renderer:
|
('header', 'hashtagHeaderRenderer'),
|
||||||
title = renderer.get('title')
|
expected_type=dict) or {}
|
||||||
else:
|
title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'),
|
||||||
renderer = try_get(
|
expected_type=txt_or_none)
|
||||||
data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
|
|
||||||
if renderer:
|
|
||||||
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
|
|
||||||
playlist = self.playlist_result(
|
playlist = self.playlist_result(
|
||||||
self._entries(selected_tab, item_id, webpage),
|
self._entries(selected_tab, item_id, webpage),
|
||||||
playlist_id=playlist_id, playlist_title=title,
|
playlist_id=playlist_id, playlist_title=title,
|
||||||
@ -3265,15 +3429,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
return merge_dicts(playlist, self._extract_uploader(renderer, data))
|
return merge_dicts(playlist, self._extract_uploader(renderer, data))
|
||||||
|
|
||||||
def _extract_from_playlist(self, item_id, url, data, playlist):
|
def _extract_from_playlist(self, item_id, url, data, playlist):
|
||||||
title = playlist.get('title') or try_get(
|
title = traverse_obj((playlist, data),
|
||||||
data, lambda x: x['titleText']['simpleText'], compat_str)
|
(0, 'title'), (1, 'titleText', 'simpleText'),
|
||||||
playlist_id = playlist.get('playlistId') or item_id
|
expected_type=txt_or_none)
|
||||||
|
playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
|
||||||
# Inline playlist rendition continuation does not always work
|
# Inline playlist rendition continuation does not always work
|
||||||
# at Youtube side, so delegating regular tab-based playlist URL
|
# at Youtube side, so delegating regular tab-based playlist URL
|
||||||
# processing whenever possible.
|
# processing whenever possible.
|
||||||
playlist_url = urljoin(url, try_get(
|
playlist_url = urljoin(url, traverse_obj(
|
||||||
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
|
||||||
compat_str))
|
expected_type=url_or_none))
|
||||||
if playlist_url and playlist_url != url:
|
if playlist_url and playlist_url != url:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||||
|
@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import math
|
|
||||||
import operator
|
import operator
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from functools import update_wrapper
|
||||||
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -13,21 +14,60 @@ from .utils import (
|
|||||||
remove_quotes,
|
remove_quotes,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
variadic,
|
variadic,
|
||||||
|
write_string,
|
||||||
)
|
)
|
||||||
from .compat import (
|
from .compat import (
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_collections_chain_map as ChainMap,
|
compat_collections_chain_map as ChainMap,
|
||||||
|
compat_filter as filter,
|
||||||
compat_itertools_zip_longest as zip_longest,
|
compat_itertools_zip_longest as zip_longest,
|
||||||
|
compat_map as map,
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# name JS functions
|
||||||
|
class function_with_repr(object):
|
||||||
|
# from yt_dlp/utils.py, but in this module
|
||||||
|
# repr_ is always set
|
||||||
|
def __init__(self, func, repr_):
|
||||||
|
update_wrapper(self, func)
|
||||||
|
self.func, self.__repr = func, repr_
|
||||||
|
|
||||||
|
def __call__(self, *args, **kwargs):
|
||||||
|
return self.func(*args, **kwargs)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.__repr
|
||||||
|
|
||||||
|
|
||||||
|
# name JS operators
|
||||||
|
def wraps_op(op):
|
||||||
|
|
||||||
|
def update_and_rename_wrapper(w):
|
||||||
|
f = update_wrapper(w, op)
|
||||||
|
# fn names are str in both Py 2/3
|
||||||
|
f.__name__ = str('JS_') + f.__name__
|
||||||
|
return f
|
||||||
|
|
||||||
|
return update_and_rename_wrapper
|
||||||
|
|
||||||
|
|
||||||
|
# NB In principle NaN cannot be checked by membership.
|
||||||
|
# Here all NaN values are actually this one, so _NaN is _NaN,
|
||||||
|
# although _NaN != _NaN. Ditto Infinity.
|
||||||
|
|
||||||
|
_NaN = float('nan')
|
||||||
|
_Infinity = float('inf')
|
||||||
|
|
||||||
|
|
||||||
def _js_bit_op(op):
|
def _js_bit_op(op):
|
||||||
|
|
||||||
def zeroise(x):
|
def zeroise(x):
|
||||||
return 0 if x in (None, JS_Undefined) else x
|
return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
return op(zeroise(a), zeroise(b)) & 0xffffffff
|
||||||
|
|
||||||
@ -36,23 +76,24 @@ def _js_bit_op(op):
|
|||||||
|
|
||||||
def _js_arith_op(op):
|
def _js_arith_op(op):
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
if JS_Undefined in (a, b):
|
if JS_Undefined in (a, b):
|
||||||
return float('nan')
|
return _NaN
|
||||||
return op(a or 0, b or 0)
|
return op(a or 0, b or 0)
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
def _js_div(a, b):
|
def _js_div(a, b):
|
||||||
if JS_Undefined in (a, b) or not (a and b):
|
if JS_Undefined in (a, b) or not (a or b):
|
||||||
return float('nan')
|
return _NaN
|
||||||
return operator.truediv(a or 0, b) if b else float('inf')
|
return operator.truediv(a or 0, b) if b else _Infinity
|
||||||
|
|
||||||
|
|
||||||
def _js_mod(a, b):
|
def _js_mod(a, b):
|
||||||
if JS_Undefined in (a, b) or not b:
|
if JS_Undefined in (a, b) or not b:
|
||||||
return float('nan')
|
return _NaN
|
||||||
return (a or 0) % b
|
return (a or 0) % b
|
||||||
|
|
||||||
|
|
||||||
@ -60,12 +101,13 @@ def _js_exp(a, b):
|
|||||||
if not b:
|
if not b:
|
||||||
return 1 # even 0 ** 0 !!
|
return 1 # even 0 ** 0 !!
|
||||||
elif JS_Undefined in (a, b):
|
elif JS_Undefined in (a, b):
|
||||||
return float('nan')
|
return _NaN
|
||||||
return (a or 0) ** b
|
return (a or 0) ** b
|
||||||
|
|
||||||
|
|
||||||
def _js_eq_op(op):
|
def _js_eq_op(op):
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
if set((a, b)) <= set((None, JS_Undefined)):
|
if set((a, b)) <= set((None, JS_Undefined)):
|
||||||
return op(a, a)
|
return op(a, a)
|
||||||
@ -76,6 +118,7 @@ def _js_eq_op(op):
|
|||||||
|
|
||||||
def _js_comp_op(op):
|
def _js_comp_op(op):
|
||||||
|
|
||||||
|
@wraps_op(op)
|
||||||
def wrapped(a, b):
|
def wrapped(a, b):
|
||||||
if JS_Undefined in (a, b):
|
if JS_Undefined in (a, b):
|
||||||
return False
|
return False
|
||||||
@ -90,13 +133,8 @@ def _js_comp_op(op):
|
|||||||
|
|
||||||
def _js_ternary(cndn, if_true=True, if_false=False):
|
def _js_ternary(cndn, if_true=True, if_false=False):
|
||||||
"""Simulate JS's ternary operator (cndn?if_true:if_false)"""
|
"""Simulate JS's ternary operator (cndn?if_true:if_false)"""
|
||||||
if cndn in (False, None, 0, '', JS_Undefined):
|
if cndn in (False, None, 0, '', JS_Undefined, _NaN):
|
||||||
return if_false
|
return if_false
|
||||||
try:
|
|
||||||
if math.isnan(cndn): # NB: NaN cannot be checked by membership
|
|
||||||
return if_false
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
return if_true
|
return if_true
|
||||||
|
|
||||||
|
|
||||||
@ -186,6 +224,42 @@ class LocalNameSpace(ChainMap):
|
|||||||
return 'LocalNameSpace%s' % (self.maps, )
|
return 'LocalNameSpace%s' % (self.maps, )
|
||||||
|
|
||||||
|
|
||||||
|
class Debugger(object):
|
||||||
|
ENABLED = False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def write(*args, **kwargs):
|
||||||
|
level = kwargs.get('level', 100)
|
||||||
|
|
||||||
|
def truncate_string(s, left, right=0):
|
||||||
|
if s is None or len(s) <= left + right:
|
||||||
|
return s
|
||||||
|
return '...'.join((s[:left - 3], s[-right:] if right else ''))
|
||||||
|
|
||||||
|
write_string('[debug] JS: {0}{1}\n'.format(
|
||||||
|
' ' * (100 - level),
|
||||||
|
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def wrap_interpreter(cls, f):
|
||||||
|
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||||
|
if cls.ENABLED and stmt.strip():
|
||||||
|
cls.write(stmt, level=allow_recursion)
|
||||||
|
try:
|
||||||
|
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
if cls.ENABLED:
|
||||||
|
if isinstance(e, ExtractorError):
|
||||||
|
e = e.orig_msg
|
||||||
|
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
|
||||||
|
raise
|
||||||
|
if cls.ENABLED and stmt.strip():
|
||||||
|
if should_ret or repr(ret) != stmt:
|
||||||
|
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
|
||||||
|
return ret, should_ret
|
||||||
|
return interpret_statement
|
||||||
|
|
||||||
|
|
||||||
class JSInterpreter(object):
|
class JSInterpreter(object):
|
||||||
__named_object_counter = 0
|
__named_object_counter = 0
|
||||||
|
|
||||||
@ -243,9 +317,20 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
self.__instantiate()
|
self.__instantiate()
|
||||||
if hasattr(self, name):
|
# make Py 2.6 conform to its lying documentation
|
||||||
return getattr(self, name)
|
if name == 'flags':
|
||||||
return super(JSInterpreter.JS_RegExp, self).__getattr__(name)
|
self.flags = self.__flags
|
||||||
|
return self.flags
|
||||||
|
elif name == 'pattern':
|
||||||
|
self.pattern = self.__pattern_txt
|
||||||
|
return self.pattern
|
||||||
|
elif hasattr(self.__self, name):
|
||||||
|
v = getattr(self.__self, name)
|
||||||
|
setattr(self, name, v)
|
||||||
|
return v
|
||||||
|
elif name in ('groupindex', 'groups'):
|
||||||
|
return 0 if name == 'groupindex' else {}
|
||||||
|
raise AttributeError('{0} has no attribute named {1}'.format(self, name))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def regex_flags(cls, expr):
|
def regex_flags(cls, expr):
|
||||||
@ -262,13 +347,14 @@ class JSInterpreter(object):
|
|||||||
def __op_chars(cls):
|
def __op_chars(cls):
|
||||||
op_chars = set(';,[')
|
op_chars = set(';,[')
|
||||||
for op in cls._all_operators():
|
for op in cls._all_operators():
|
||||||
for c in op[0]:
|
op_chars.update(op[0])
|
||||||
op_chars.add(c)
|
|
||||||
return op_chars
|
return op_chars
|
||||||
|
|
||||||
def _named_object(self, namespace, obj):
|
def _named_object(self, namespace, obj):
|
||||||
self.__named_object_counter += 1
|
self.__named_object_counter += 1
|
||||||
name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
|
name = '%s%d' % (self._OBJ_NAME, self.__named_object_counter)
|
||||||
|
if callable(obj) and not isinstance(obj, function_with_repr):
|
||||||
|
obj = function_with_repr(obj, 'F<%s>' % (self.__named_object_counter, ))
|
||||||
namespace[name] = obj
|
namespace[name] = obj
|
||||||
return name
|
return name
|
||||||
|
|
||||||
@ -279,9 +365,10 @@ class JSInterpreter(object):
|
|||||||
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
# collections.Counter() is ~10% slower in both 2.7 and 3.9
|
||||||
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
counters = dict((k, 0) for k in _MATCHING_PARENS.values())
|
||||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
in_quote, escaping, skipping = None, False, 0
|
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||||
after_op, in_regex_char_group = True, False
|
skipping = 0
|
||||||
|
if skip_delims:
|
||||||
|
skip_delims = variadic(skip_delims)
|
||||||
for idx, char in enumerate(expr):
|
for idx, char in enumerate(expr):
|
||||||
paren_delta = 0
|
paren_delta = 0
|
||||||
if not in_quote:
|
if not in_quote:
|
||||||
@ -308,7 +395,7 @@ class JSInterpreter(object):
|
|||||||
continue
|
continue
|
||||||
elif pos == 0 and skip_delims:
|
elif pos == 0 and skip_delims:
|
||||||
here = expr[idx:]
|
here = expr[idx:]
|
||||||
for s in variadic(skip_delims):
|
for s in skip_delims:
|
||||||
if here.startswith(s) and s:
|
if here.startswith(s) and s:
|
||||||
skipping = len(s) - 1
|
skipping = len(s) - 1
|
||||||
break
|
break
|
||||||
@ -329,16 +416,17 @@ class JSInterpreter(object):
|
|||||||
if delim is None:
|
if delim is None:
|
||||||
delim = expr and _MATCHING_PARENS[expr[0]]
|
delim = expr and _MATCHING_PARENS[expr[0]]
|
||||||
separated = list(cls._separate(expr, delim, 1))
|
separated = list(cls._separate(expr, delim, 1))
|
||||||
|
|
||||||
if len(separated) < 2:
|
if len(separated) < 2:
|
||||||
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
|
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
|
||||||
return separated[0][1:].strip(), separated[1].strip()
|
return separated[0][1:].strip(), separated[1].strip()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _all_operators():
|
def _all_operators(_cached=[]):
|
||||||
return itertools.chain(
|
if not _cached:
|
||||||
|
_cached.extend(itertools.chain(
|
||||||
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
|
||||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
|
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
|
||||||
|
return _cached
|
||||||
|
|
||||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||||
if op in ('||', '&&'):
|
if op in ('||', '&&'):
|
||||||
@ -356,6 +444,7 @@ class JSInterpreter(object):
|
|||||||
return right_val
|
return right_val
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# print('Eval:', opfunc.__name__, left_val, right_val)
|
||||||
return opfunc(left_val, right_val)
|
return opfunc(left_val, right_val)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
|
||||||
@ -368,7 +457,7 @@ class JSInterpreter(object):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
if allow_undefined:
|
if allow_undefined:
|
||||||
return JS_Undefined
|
return JS_Undefined
|
||||||
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
|
||||||
|
|
||||||
def _dump(self, obj, namespace):
|
def _dump(self, obj, namespace):
|
||||||
try:
|
try:
|
||||||
@ -390,15 +479,18 @@ class JSInterpreter(object):
|
|||||||
_FINALLY_RE = re.compile(r'finally\s*\{')
|
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||||
_SWITCH_RE = re.compile(r'switch\s*\(')
|
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||||
|
|
||||||
|
@Debugger.wrap_interpreter
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise self.Exception('Recursion limit reached')
|
raise self.Exception('Recursion limit reached')
|
||||||
allow_recursion -= 1
|
allow_recursion -= 1
|
||||||
|
|
||||||
|
# print('At: ' + stmt[:60])
|
||||||
should_return = False
|
should_return = False
|
||||||
# fails on (eg) if (...) stmt1; else stmt2;
|
# fails on (eg) if (...) stmt1; else stmt2;
|
||||||
sub_statements = list(self._separate(stmt, ';')) or ['']
|
sub_statements = list(self._separate(stmt, ';')) or ['']
|
||||||
expr = stmt = sub_statements.pop().strip()
|
expr = stmt = sub_statements.pop().strip()
|
||||||
|
|
||||||
for sub_stmt in sub_statements:
|
for sub_stmt in sub_statements:
|
||||||
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
|
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
|
||||||
if should_return:
|
if should_return:
|
||||||
@ -462,6 +554,12 @@ class JSInterpreter(object):
|
|||||||
expr = self._dump(inner, local_vars) + outer
|
expr = self._dump(inner, local_vars) + outer
|
||||||
|
|
||||||
if expr.startswith('('):
|
if expr.startswith('('):
|
||||||
|
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
|
||||||
|
if m:
|
||||||
|
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
|
||||||
|
outer = None
|
||||||
|
inner, should_abort = self._offset_e_by_d(m.group('d'), m.group('e'), local_vars)
|
||||||
|
else:
|
||||||
inner, outer = self._separate_at_paren(expr)
|
inner, outer = self._separate_at_paren(expr)
|
||||||
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
|
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
|
||||||
if not outer or should_abort:
|
if not outer or should_abort:
|
||||||
@ -532,8 +630,7 @@ class JSInterpreter(object):
|
|||||||
if m.group('err'):
|
if m.group('err'):
|
||||||
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
|
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
|
||||||
catch_vars = local_vars.new_child(m=catch_vars)
|
catch_vars = local_vars.new_child(m=catch_vars)
|
||||||
err = None
|
err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
||||||
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
|
|
||||||
|
|
||||||
m = self._FINALLY_RE.match(expr)
|
m = self._FINALLY_RE.match(expr)
|
||||||
if m:
|
if m:
|
||||||
@ -637,7 +734,7 @@ class JSInterpreter(object):
|
|||||||
(?P<op>{_OPERATOR_RE})?
|
(?P<op>{_OPERATOR_RE})?
|
||||||
=(?!=)(?P<expr>.*)$
|
=(?!=)(?P<expr>.*)$
|
||||||
)|(?P<return>
|
)|(?P<return>
|
||||||
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
|
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||||
)|(?P<indexing>
|
)|(?P<indexing>
|
||||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||||
)|(?P<attribute>
|
)|(?P<attribute>
|
||||||
@ -671,11 +768,12 @@ class JSInterpreter(object):
|
|||||||
raise JS_Break()
|
raise JS_Break()
|
||||||
elif expr == 'continue':
|
elif expr == 'continue':
|
||||||
raise JS_Continue()
|
raise JS_Continue()
|
||||||
|
|
||||||
elif expr == 'undefined':
|
elif expr == 'undefined':
|
||||||
return JS_Undefined, should_return
|
return JS_Undefined, should_return
|
||||||
elif expr == 'NaN':
|
elif expr == 'NaN':
|
||||||
return float('NaN'), should_return
|
return _NaN, should_return
|
||||||
|
elif expr == 'Infinity':
|
||||||
|
return _Infinity, should_return
|
||||||
|
|
||||||
elif md.get('return'):
|
elif md.get('return'):
|
||||||
return local_vars[m.group('name')], should_return
|
return local_vars[m.group('name')], should_return
|
||||||
@ -702,9 +800,37 @@ class JSInterpreter(object):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
right_expr = separated.pop()
|
right_expr = separated.pop()
|
||||||
while op == '-' and len(separated) > 1 and not separated[-1].strip():
|
# handle operators that are both unary and binary, minimal BODMAS
|
||||||
right_expr = '-' + right_expr
|
if op in ('+', '-'):
|
||||||
|
# simplify/adjust consecutive instances of these operators
|
||||||
|
undone = 0
|
||||||
|
separated = [s.strip() for s in separated]
|
||||||
|
while len(separated) > 1 and not separated[-1]:
|
||||||
|
undone += 1
|
||||||
separated.pop()
|
separated.pop()
|
||||||
|
if op == '-' and undone % 2 != 0:
|
||||||
|
right_expr = op + right_expr
|
||||||
|
elif op == '+':
|
||||||
|
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
if separated[-1][-1:] in self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
# hanging op at end of left => unary + (strip) or - (push right)
|
||||||
|
left_val = separated[-1] if separated else ''
|
||||||
|
for dm_op in ('*', '%', '/', '**'):
|
||||||
|
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
||||||
|
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||||
|
expr = op.join(separated) + op + right_expr
|
||||||
|
if len(separated) > 1:
|
||||||
|
separated.pop()
|
||||||
|
right_expr = op.join((left_val, right_expr))
|
||||||
|
else:
|
||||||
|
separated = [op.join((left_val, right_expr))]
|
||||||
|
right_expr = None
|
||||||
|
break
|
||||||
|
if right_expr is None:
|
||||||
|
continue
|
||||||
|
|
||||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
||||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
|
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return
|
||||||
|
|
||||||
@ -724,12 +850,15 @@ class JSInterpreter(object):
|
|||||||
memb = member
|
memb = member
|
||||||
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
def eval_method():
|
def eval_method(variable, member):
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
|
if Debugger.ENABLED:
|
||||||
|
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
|
||||||
return
|
return
|
||||||
types = {
|
types = {
|
||||||
'String': compat_str,
|
'String': compat_str,
|
||||||
'Math': float,
|
'Math': float,
|
||||||
|
'Array': list,
|
||||||
}
|
}
|
||||||
obj = local_vars.get(variable)
|
obj = local_vars.get(variable)
|
||||||
if obj in (JS_Undefined, None):
|
if obj in (JS_Undefined, None):
|
||||||
@ -755,12 +884,29 @@ class JSInterpreter(object):
|
|||||||
self.interpret_expression(v, local_vars, allow_recursion)
|
self.interpret_expression(v, local_vars, allow_recursion)
|
||||||
for v in self._separate(arg_str)]
|
for v in self._separate(arg_str)]
|
||||||
|
|
||||||
if obj == compat_str:
|
# Fixup prototype call
|
||||||
|
if isinstance(obj, type):
|
||||||
|
new_member, rest = member.partition('.')[0::2]
|
||||||
|
if new_member == 'prototype':
|
||||||
|
new_member, func_prototype = rest.partition('.')[0::2]
|
||||||
|
assertion(argvals, 'takes one or more arguments')
|
||||||
|
assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
|
||||||
|
if func_prototype == 'call':
|
||||||
|
obj = argvals.pop(0)
|
||||||
|
elif func_prototype == 'apply':
|
||||||
|
assertion(len(argvals) == 2, 'takes two arguments')
|
||||||
|
obj, argvals = argvals
|
||||||
|
assertion(isinstance(argvals, list), 'second argument must be a list')
|
||||||
|
else:
|
||||||
|
raise self.Exception('Unsupported Function method ' + func_prototype, expr)
|
||||||
|
member = new_member
|
||||||
|
|
||||||
|
if obj is compat_str:
|
||||||
if member == 'fromCharCode':
|
if member == 'fromCharCode':
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(argvals, 'takes one or more arguments')
|
||||||
return ''.join(map(compat_chr, argvals))
|
return ''.join(map(compat_chr, argvals))
|
||||||
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
raise self.Exception('Unsupported string method ' + member, expr=expr)
|
||||||
elif obj == float:
|
elif obj is float:
|
||||||
if member == 'pow':
|
if member == 'pow':
|
||||||
assertion(len(argvals) == 2, 'takes two arguments')
|
assertion(len(argvals) == 2, 'takes two arguments')
|
||||||
return argvals[0] ** argvals[1]
|
return argvals[0] ** argvals[1]
|
||||||
@ -779,18 +925,25 @@ class JSInterpreter(object):
|
|||||||
obj.reverse()
|
obj.reverse()
|
||||||
return obj
|
return obj
|
||||||
elif member == 'slice':
|
elif member == 'slice':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
|
||||||
assertion(len(argvals) == 1, 'takes exactly one argument')
|
# From [1]:
|
||||||
return obj[argvals[0]:]
|
# .slice() - like [:]
|
||||||
|
# .slice(n) - like [n:] (not [slice(n)]
|
||||||
|
# .slice(m, n) - like [m:n] or [slice(m, n)]
|
||||||
|
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
|
||||||
|
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
|
||||||
|
if len(argvals) < 2:
|
||||||
|
argvals += (None,)
|
||||||
|
return obj[slice(*argvals)]
|
||||||
elif member == 'splice':
|
elif member == 'splice':
|
||||||
assertion(isinstance(obj, list), 'must be applied on a list')
|
assertion(isinstance(obj, list), 'must be applied on a list')
|
||||||
assertion(argvals, 'takes one or more arguments')
|
assertion(argvals, 'takes one or more arguments')
|
||||||
index, howMany = map(int, (argvals + [len(obj)])[:2])
|
index, how_many = map(int, (argvals + [len(obj)])[:2])
|
||||||
if index < 0:
|
if index < 0:
|
||||||
index += len(obj)
|
index += len(obj)
|
||||||
add_items = argvals[2:]
|
add_items = argvals[2:]
|
||||||
res = []
|
res = []
|
||||||
for i in range(index, min(index + howMany, len(obj))):
|
for _ in range(index, min(index + how_many, len(obj))):
|
||||||
res.append(obj.pop(index))
|
res.append(obj.pop(index))
|
||||||
for i, item in enumerate(add_items):
|
for i, item in enumerate(add_items):
|
||||||
obj.insert(index + i, item)
|
obj.insert(index + i, item)
|
||||||
@ -848,11 +1001,11 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
if remaining:
|
if remaining:
|
||||||
ret, should_abort = self.interpret_statement(
|
ret, should_abort = self.interpret_statement(
|
||||||
self._named_object(local_vars, eval_method()) + remaining,
|
self._named_object(local_vars, eval_method(variable, member)) + remaining,
|
||||||
local_vars, allow_recursion)
|
local_vars, allow_recursion)
|
||||||
return ret, should_return or should_abort
|
return ret, should_return or should_abort
|
||||||
else:
|
else:
|
||||||
return eval_method(), should_return
|
return eval_method(variable, member), should_return
|
||||||
|
|
||||||
elif md.get('function'):
|
elif md.get('function'):
|
||||||
fname = m.group('fname')
|
fname = m.group('fname')
|
||||||
@ -880,28 +1033,39 @@ class JSInterpreter(object):
|
|||||||
def extract_object(self, objname):
|
def extract_object(self, objname):
|
||||||
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
|
||||||
obj = {}
|
obj = {}
|
||||||
obj_m = re.search(
|
fields = next(filter(None, (
|
||||||
r'''(?x)
|
obj_m.group('fields') for obj_m in re.finditer(
|
||||||
(?<!this\.)%s\s*=\s*{\s*
|
r'''(?xs)
|
||||||
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
|
||||||
}\s*;
|
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
|
||||||
''' % (re.escape(objname), _FUNC_NAME_RE),
|
}}\s*;
|
||||||
self.code)
|
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
|
||||||
if not obj_m:
|
self.code))), None)
|
||||||
|
if not fields:
|
||||||
raise self.Exception('Could not find object ' + objname)
|
raise self.Exception('Could not find object ' + objname)
|
||||||
fields = obj_m.group('fields')
|
|
||||||
# Currently, it only supports function definitions
|
# Currently, it only supports function definitions
|
||||||
fields_m = re.finditer(
|
for f in re.finditer(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
|
||||||
''' % (_FUNC_NAME_RE, _NAME_RE),
|
''' % (_FUNC_NAME_RE, _NAME_RE),
|
||||||
fields)
|
fields):
|
||||||
for f in fields_m:
|
|
||||||
argnames = self.build_arglist(f.group('args'))
|
argnames = self.build_arglist(f.group('args'))
|
||||||
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
|
name = remove_quotes(f.group('key'))
|
||||||
|
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _offset_e_by_d(d, e, local_vars):
|
||||||
|
""" Short-cut eval: (d%e.length+e.length)%e.length """
|
||||||
|
try:
|
||||||
|
d = local_vars[d]
|
||||||
|
e = local_vars[e]
|
||||||
|
e = len(e)
|
||||||
|
return _js_mod(_js_mod(d, e) + e, e), False
|
||||||
|
except Exception:
|
||||||
|
return None, True
|
||||||
|
|
||||||
def extract_function_code(self, funcname):
|
def extract_function_code(self, funcname):
|
||||||
""" @returns argnames, code """
|
""" @returns argnames, code """
|
||||||
func_m = re.search(
|
func_m = re.search(
|
||||||
@ -914,13 +1078,15 @@ class JSInterpreter(object):
|
|||||||
\((?P<args>[^)]*)\)\s*
|
\((?P<args>[^)]*)\)\s*
|
||||||
(?P<code>{.+})''' % {'name': re.escape(funcname)},
|
(?P<code>{.+})''' % {'name': re.escape(funcname)},
|
||||||
self.code)
|
self.code)
|
||||||
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
|
||||||
if func_m is None:
|
if func_m is None:
|
||||||
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
|
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
|
||||||
|
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
|
||||||
return self.build_arglist(func_m.group('args')), code
|
return self.build_arglist(func_m.group('args')), code
|
||||||
|
|
||||||
def extract_function(self, funcname):
|
def extract_function(self, funcname):
|
||||||
return self.extract_function_from_code(*self.extract_function_code(funcname))
|
return function_with_repr(
|
||||||
|
self.extract_function_from_code(*self.extract_function_code(funcname)),
|
||||||
|
'F<%s>' % (funcname,))
|
||||||
|
|
||||||
def extract_function_from_code(self, argnames, code, *global_stack):
|
def extract_function_from_code(self, argnames, code, *global_stack):
|
||||||
local_vars = {}
|
local_vars = {}
|
||||||
@ -929,7 +1095,7 @@ class JSInterpreter(object):
|
|||||||
if mobj is None:
|
if mobj is None:
|
||||||
break
|
break
|
||||||
start, body_start = mobj.span()
|
start, body_start = mobj.span()
|
||||||
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
|
body, remaining = self._separate_at_paren(code[body_start - 1:])
|
||||||
name = self._named_object(local_vars, self.extract_function_from_code(
|
name = self._named_object(local_vars, self.extract_function_from_code(
|
||||||
[x.strip() for x in mobj.group('args').split(',')],
|
[x.strip() for x in mobj.group('args').split(',')],
|
||||||
body, local_vars, *global_stack))
|
body, local_vars, *global_stack))
|
||||||
@ -957,8 +1123,7 @@ class JSInterpreter(object):
|
|||||||
argnames = tuple(argnames)
|
argnames = tuple(argnames)
|
||||||
|
|
||||||
def resf(args, kwargs={}, allow_recursion=100):
|
def resf(args, kwargs={}, allow_recursion=100):
|
||||||
global_stack[0].update(
|
global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
|
||||||
zip_longest(argnames, args, fillvalue=None))
|
|
||||||
global_stack[0].update(kwargs)
|
global_stack[0].update(kwargs)
|
||||||
var_stack = LocalNameSpace(*global_stack)
|
var_stack = LocalNameSpace(*global_stack)
|
||||||
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
|
||||||
|
@ -11,6 +11,7 @@ from .compat import (
|
|||||||
compat_get_terminal_size,
|
compat_get_terminal_size,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
|
compat_open as open,
|
||||||
compat_shlex_split,
|
compat_shlex_split,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
@ -41,14 +42,11 @@ def _hide_login_info(opts):
|
|||||||
def parseOpts(overrideArguments=None):
|
def parseOpts(overrideArguments=None):
|
||||||
def _readOptions(filename_bytes, default=[]):
|
def _readOptions(filename_bytes, default=[]):
|
||||||
try:
|
try:
|
||||||
optionf = open(filename_bytes)
|
optionf = open(filename_bytes, encoding=preferredencoding())
|
||||||
except IOError:
|
except IOError:
|
||||||
return default # silently skip if file is not present
|
return default # silently skip if file is not present
|
||||||
try:
|
try:
|
||||||
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
|
|
||||||
contents = optionf.read()
|
contents = optionf.read()
|
||||||
if sys.version_info < (3,):
|
|
||||||
contents = contents.decode(preferredencoding())
|
|
||||||
res = compat_shlex_split(contents, comments=True)
|
res = compat_shlex_split(contents, comments=True)
|
||||||
finally:
|
finally:
|
||||||
optionf.close()
|
optionf.close()
|
||||||
@ -535,6 +533,10 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--no-check-certificate',
|
'--no-check-certificate',
|
||||||
action='store_true', dest='no_check_certificate', default=False,
|
action='store_true', dest='no_check_certificate', default=False,
|
||||||
help='Suppress HTTPS certificate validation')
|
help='Suppress HTTPS certificate validation')
|
||||||
|
workarounds.add_option(
|
||||||
|
'--no-check-extensions',
|
||||||
|
action='store_true', dest='no_check_extensions', default=False,
|
||||||
|
help='Suppress file extension validation')
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--prefer-insecure',
|
'--prefer-insecure',
|
||||||
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
'--prefer-unsecure', action='store_true', dest='prefer_insecure',
|
||||||
@ -546,12 +548,14 @@ def parseOpts(overrideArguments=None):
|
|||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--referer',
|
'--referer',
|
||||||
metavar='URL', dest='referer', default=None,
|
metavar='URL', dest='referer', default=None,
|
||||||
help='Specify a custom referer, use if the video access is restricted to one domain',
|
help='Specify a custom Referer: use if the video access is restricted to one domain',
|
||||||
)
|
)
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--add-header',
|
'--add-header',
|
||||||
metavar='FIELD:VALUE', dest='headers', action='append',
|
metavar='FIELD:VALUE', dest='headers', action='append',
|
||||||
help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
|
help=('Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times. '
|
||||||
|
'NB Use --cookies rather than adding a Cookie header if its contents may be sensitive; '
|
||||||
|
'data from a Cookie header will be sent to all domains, not just the one intended')
|
||||||
)
|
)
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--bidi-workaround',
|
'--bidi-workaround',
|
||||||
@ -733,9 +737,13 @@ def parseOpts(overrideArguments=None):
|
|||||||
'--no-part',
|
'--no-part',
|
||||||
action='store_true', dest='nopart', default=False,
|
action='store_true', dest='nopart', default=False,
|
||||||
help='Do not use .part files - write directly into output file')
|
help='Do not use .part files - write directly into output file')
|
||||||
|
filesystem.add_option(
|
||||||
|
'--mtime',
|
||||||
|
action='store_true', dest='updatetime', default=True,
|
||||||
|
help='Use the Last-modified header to set the file modification time (default)')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--no-mtime',
|
'--no-mtime',
|
||||||
action='store_false', dest='updatetime', default=True,
|
action='store_false', dest='updatetime',
|
||||||
help='Do not use the Last-modified header to set the file modification time')
|
help='Do not use the Last-modified header to set the file modification time')
|
||||||
filesystem.add_option(
|
filesystem.add_option(
|
||||||
'--write-description',
|
'--write-description',
|
||||||
|
@ -18,6 +18,8 @@ from ..utils import (
|
|||||||
shell_quote,
|
shell_quote,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from ..compat import compat_open as open
|
||||||
|
|
||||||
|
|
||||||
class EmbedThumbnailPPError(PostProcessingError):
|
class EmbedThumbnailPPError(PostProcessingError):
|
||||||
pass
|
pass
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
@ -9,6 +8,7 @@ import re
|
|||||||
|
|
||||||
from .common import AudioConversionError, PostProcessor
|
from .common import AudioConversionError, PostProcessor
|
||||||
|
|
||||||
|
from ..compat import compat_open as open
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encodeArgument,
|
encodeArgument,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
@ -74,8 +74,11 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
return FFmpegPostProcessor(downloader)._versions
|
return FFmpegPostProcessor(downloader)._versions
|
||||||
|
|
||||||
def _determine_executables(self):
|
def _determine_executables(self):
|
||||||
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
|
# ordered to match prefer_ffmpeg!
|
||||||
|
convs = ['ffmpeg', 'avconv']
|
||||||
|
probes = ['ffprobe', 'avprobe']
|
||||||
prefer_ffmpeg = True
|
prefer_ffmpeg = True
|
||||||
|
programs = convs + probes
|
||||||
|
|
||||||
def get_ffmpeg_version(path):
|
def get_ffmpeg_version(path):
|
||||||
ver = get_exe_version(path, args=['-version'])
|
ver = get_exe_version(path, args=['-version'])
|
||||||
@ -96,6 +99,7 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
|
|
||||||
self._paths = None
|
self._paths = None
|
||||||
self._versions = None
|
self._versions = None
|
||||||
|
location = None
|
||||||
if self._downloader:
|
if self._downloader:
|
||||||
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
|
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
|
||||||
location = self._downloader.params.get('ffmpeg_location')
|
location = self._downloader.params.get('ffmpeg_location')
|
||||||
@ -118,33 +122,21 @@ class FFmpegPostProcessor(PostProcessor):
|
|||||||
location = os.path.dirname(os.path.abspath(location))
|
location = os.path.dirname(os.path.abspath(location))
|
||||||
if basename in ('ffmpeg', 'ffprobe'):
|
if basename in ('ffmpeg', 'ffprobe'):
|
||||||
prefer_ffmpeg = True
|
prefer_ffmpeg = True
|
||||||
|
|
||||||
self._paths = dict(
|
self._paths = dict(
|
||||||
(p, os.path.join(location, p)) for p in programs)
|
(p, p if location is None else os.path.join(location, p))
|
||||||
|
for p in programs)
|
||||||
self._versions = dict(
|
self._versions = dict(
|
||||||
|
x for x in (
|
||||||
(p, get_ffmpeg_version(self._paths[p])) for p in programs)
|
(p, get_ffmpeg_version(self._paths[p])) for p in programs)
|
||||||
if self._versions is None:
|
if x[1] is not None)
|
||||||
self._versions = dict(
|
|
||||||
(p, get_ffmpeg_version(p)) for p in programs)
|
|
||||||
self._paths = dict((p, p) for p in programs)
|
|
||||||
|
|
||||||
if prefer_ffmpeg is False:
|
basenames = [None, None]
|
||||||
prefs = ('avconv', 'ffmpeg')
|
for i, progs in enumerate((convs, probes)):
|
||||||
else:
|
for p in progs[::-1 if prefer_ffmpeg is False else 1]:
|
||||||
prefs = ('ffmpeg', 'avconv')
|
if self._versions.get(p):
|
||||||
for p in prefs:
|
basenames[i] = p
|
||||||
if self._versions[p]:
|
|
||||||
self.basename = p
|
|
||||||
break
|
|
||||||
|
|
||||||
if prefer_ffmpeg is False:
|
|
||||||
prefs = ('avprobe', 'ffprobe')
|
|
||||||
else:
|
|
||||||
prefs = ('ffprobe', 'avprobe')
|
|
||||||
for p in prefs:
|
|
||||||
if self._versions[p]:
|
|
||||||
self.probe_basename = p
|
|
||||||
break
|
break
|
||||||
|
self.basename, self.probe_basename = basenames
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def available(self):
|
def available(self):
|
||||||
@ -493,7 +485,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
|||||||
chapters = info.get('chapters', [])
|
chapters = info.get('chapters', [])
|
||||||
if chapters:
|
if chapters:
|
||||||
metadata_filename = replace_extension(filename, 'meta')
|
metadata_filename = replace_extension(filename, 'meta')
|
||||||
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
with open(metadata_filename, 'w', encoding='utf-8') as f:
|
||||||
def ffmpeg_escape(text):
|
def ffmpeg_escape(text):
|
||||||
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
||||||
|
|
||||||
@ -636,7 +628,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
with open(dfxp_file, 'rb') as f:
|
with open(dfxp_file, 'rb') as f:
|
||||||
srt_data = dfxp2srt(f.read())
|
srt_data = dfxp2srt(f.read())
|
||||||
|
|
||||||
with io.open(srt_file, 'wt', encoding='utf-8') as f:
|
with open(srt_file, 'w', encoding='utf-8') as f:
|
||||||
f.write(srt_data)
|
f.write(srt_data)
|
||||||
old_file = srt_file
|
old_file = srt_file
|
||||||
|
|
||||||
@ -652,7 +644,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
self.run_ffmpeg(old_file, new_file, ['-f', new_format])
|
||||||
|
|
||||||
with io.open(new_file, 'rt', encoding='utf-8') as f:
|
with open(new_file, 'r', encoding='utf-8') as f:
|
||||||
subs[lang] = {
|
subs[lang] = {
|
||||||
'ext': new_ext,
|
'ext': new_ext,
|
||||||
'data': f.read(),
|
'data': f.read(),
|
||||||
|
@ -727,7 +727,7 @@ class SWFInterpreter(object):
|
|||||||
stack.append(res)
|
stack.append(res)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
assert isinstance(obj, (dict, _ScopeDict)),\
|
assert isinstance(obj, (dict, _ScopeDict)), \
|
||||||
'Accessing member %r on %r' % (pname, obj)
|
'Accessing member %r on %r' % (pname, obj)
|
||||||
res = obj.get(pname, undefined)
|
res = obj.get(pname, undefined)
|
||||||
stack.append(res)
|
stack.append(res)
|
||||||
|
10
youtube_dl/traversal.py
Normal file
10
youtube_dl/traversal.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# TODO: move these utils.fns here and move import to utils
|
||||||
|
# flake8: noqa
|
||||||
|
from .utils import (
|
||||||
|
dict_get,
|
||||||
|
get_first,
|
||||||
|
T,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
@ -1,6 +1,5 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
import hashlib
|
import hashlib
|
||||||
@ -9,7 +8,10 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
from zipimport import zipimporter
|
from zipimport import zipimporter
|
||||||
|
|
||||||
from .compat import compat_realpath
|
from .compat import (
|
||||||
|
compat_open as open,
|
||||||
|
compat_realpath,
|
||||||
|
)
|
||||||
from .utils import encode_compat_str
|
from .utils import encode_compat_str
|
||||||
|
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
@ -127,7 +129,7 @@ def update_self(to_screen, verbose, opener):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
bat = os.path.join(directory, 'youtube-dl-updater.bat')
|
bat = os.path.join(directory, 'youtube-dl-updater.bat')
|
||||||
with io.open(bat, 'w') as batfile:
|
with open(bat, 'w') as batfile:
|
||||||
batfile.write('''
|
batfile.write('''
|
||||||
@echo off
|
@echo off
|
||||||
echo Waiting for file handle to be closed ...
|
echo Waiting for file handle to be closed ...
|
||||||
|
1196
youtube_dl/utils.py
1196
youtube_dl/utils.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user