Merge branch 'ytdl-org:master' into master

This commit is contained in:
bibiak 2024-08-17 08:41:10 +02:00 committed by GitHub
commit 8e6e47d6c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
43 changed files with 5593 additions and 2382 deletions

View File

@ -6,9 +6,13 @@ env:
pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7 pypy-versions: pypy-2.7, pypy-3.6, pypy-3.7
cpython-versions: main cpython-versions: main
test-set: core test-set: core
# Python beta version to be built using pyenv before setup-python support
# Must also be included in all-cpython-versions
next: 3.13
on: on:
push: push:
# push inputs aren't known to GitHub
inputs: inputs:
cpython-versions: cpython-versions:
type: string type: string
@ -17,6 +21,7 @@ on:
type: string type: string
default: core default: core
pull_request: pull_request:
# pull_request inputs aren't known to GitHub
inputs: inputs:
cpython-versions: cpython-versions:
type: string type: string
@ -56,6 +61,23 @@ jobs:
test-set: ${{ steps.run.outputs.test-set }} test-set: ${{ steps.run.outputs.test-set }}
own-pip-versions: ${{ steps.run.outputs.own-pip-versions }} own-pip-versions: ${{ steps.run.outputs.own-pip-versions }}
steps: steps:
# push and pull_request inputs aren't known to GitHub (pt3)
- name: Set push defaults
if: ${{ github.event_name == 'push' }}
env:
cpython-versions: all
test-set: core
run: |
echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
- name: Get pull_request inputs
if: ${{ github.event_name == 'pull_request' }}
env:
cpython-versions: main
test-set: both
run: |
echo "cpython-versions=${{env.cpython-versions}}" >> "$GITHUB_ENV"
echo "test_set=${{env.test_set}}" >> "$GITHUB_ENV"
- name: Make version array - name: Make version array
id: run id: run
run: | run: |
@ -79,6 +101,7 @@ jobs:
# versions with a special get-pip.py in a per-version subdirectory # versions with a special get-pip.py in a per-version subdirectory
printf 'own-pip-versions=%s\n' \ printf 'own-pip-versions=%s\n' \
"$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT" "$(json_list 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6)" >> "$GITHUB_OUTPUT"
tests: tests:
name: Run tests name: Run tests
needs: select needs: select
@ -121,14 +144,24 @@ jobs:
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }} ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
run-tests-ext: sh run-tests-ext: sh
steps: steps:
- name: Prepare Linux
if: ${{ startswith(matrix.os, 'ubuntu') }}
shell: bash
run: |
# apt in runner, if needed, may not be up-to-date
sudo apt-get update
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v3
#-------- Python 3 ----- #-------- Python 3 -----
- name: Set up supported Python ${{ matrix.python-version }} - name: Set up supported Python ${{ matrix.python-version }}
id: setup-python id: setup-python
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != '3.12'}} if: ${{ matrix.python-impl == 'cpython' && matrix.python-version != '2.6' && matrix.python-version != '2.7' && matrix.python-version != env.next }}
# wrap broken actions/setup-python@v4 # wrap broken actions/setup-python@v4
# NB may run apt-get install in Linux
uses: ytdl-org/setup-python@v1 uses: ytdl-org/setup-python@v1
env:
# Temporary workaround for Python 3.5 failures - May 2024
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
cache-build: true cache-build: true
@ -164,23 +197,23 @@ jobs:
'import sys' \ 'import sys' \
'print(sys.path)' \ 'print(sys.path)' \
| ${expected} - | ${expected} -
#-------- Python 3.12 - #-------- Python next (was 3.12) -
- name: Set up CPython 3.12 environment - name: Set up CPython 3.next environment
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
shell: bash shell: bash
run: | run: |
PYENV_ROOT=$HOME/.local/share/pyenv PYENV_ROOT=$HOME/.local/share/pyenv
echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV" echo "PYENV_ROOT=${PYENV_ROOT}" >> "$GITHUB_ENV"
- name: Cache Python 3.12 - name: Cache Python 3.next
id: cache312 id: cachenext
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
uses: actions/cache@v3 uses: actions/cache@v3
with: with:
key: python-3.12 key: python-${{ env.next }}
path: | path: |
${{ env.PYENV_ROOT }} ${{ env.PYENV_ROOT }}
- name: Build and set up Python 3.12 - name: Build and set up Python 3.next
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' && ! steps.cache312.outputs.cache-hit }} if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next && ! steps.cachenext.outputs.cache-hit }}
# dl and build locally # dl and build locally
shell: bash shell: bash
run: | run: |
@ -192,12 +225,13 @@ jobs:
export PYENV_ROOT=${{ env.PYENV_ROOT }} export PYENV_ROOT=${{ env.PYENV_ROOT }}
export PATH=$PYENV_ROOT/bin:$PATH export PATH=$PYENV_ROOT/bin:$PATH
git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT" git clone "https://github.com/pyenv/pyenv.git" "$PYENV_ROOT"
pyenv install 3.12.0b4 pyenv install ${{ env.next }}
- name: Locate Python 3.12 - name: Locate Python 3.next
if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == '3.12' }} if: ${{ matrix.python-impl == 'cpython' && matrix.python-version == env.next }}
shell: bash shell: bash
run: | run: |
PYTHONHOME="${{ env.PYENV_ROOT }}/versions/3.12.0b4" PYTHONHOME="$(echo "${{ env.PYENV_ROOT }}/versions/${{ env.next }}."*)"
test -n "$PYTHONHOME"
echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV" echo "PYTHONHOME=$PYTHONHOME" >> "$GITHUB_ENV"
echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV" echo "PATH=${PYTHONHOME}/bin:$PATH" >> "$GITHUB_ENV"
#-------- Python 2.7 -- #-------- Python 2.7 --
@ -368,7 +402,7 @@ jobs:
done done
#-------- nose -------- #-------- nose --------
- name: Install nose for Python ${{ matrix.python-version }} - name: Install nose for Python ${{ matrix.python-version }}
if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == '3.12')) }} if: ${{ (matrix.python-version != '3.2' && steps.setup-python.outputs.python-path) || (matrix.python-impl == 'cpython' && (matrix.python-version == '2.7' || matrix.python-version == env.next)) }}
shell: bash shell: bash
run: | run: |
echo "$PATH" echo "$PATH"

View File

@ -5,9 +5,9 @@ import hashlib
import json import json
import os.path import os.path
import re import re
import types
import ssl import ssl
import sys import sys
import types
import unittest import unittest
import youtube_dl.extractor import youtube_dl.extractor
@ -181,18 +181,18 @@ def expect_value(self, got, expected, field):
op, _, expected_num = expected.partition(':') op, _, expected_num = expected.partition(':')
expected_num = int(expected_num) expected_num = int(expected_num)
if op == 'mincount': if op == 'mincount':
assert_func = assertGreaterEqual assert_func = self.assertGreaterEqual
msg_tmpl = 'Expected %d items in field %s, but only got %d' msg_tmpl = 'Expected %d items in field %s, but only got %d'
elif op == 'maxcount': elif op == 'maxcount':
assert_func = assertLessEqual assert_func = self.assertLessEqual
msg_tmpl = 'Expected maximum %d items in field %s, but got %d' msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
elif op == 'count': elif op == 'count':
assert_func = assertEqual assert_func = self.assertEqual
msg_tmpl = 'Expected exactly %d items in field %s, but got %d' msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
else: else:
assert False assert False
assert_func( assert_func(
self, len(got), expected_num, len(got), expected_num,
msg_tmpl % (expected_num, field, len(got))) msg_tmpl % (expected_num, field, len(got)))
return return
self.assertEqual( self.assertEqual(
@ -262,27 +262,6 @@ def assertRegexpMatches(self, text, regexp, msg=None):
self.assertTrue(m, msg) self.assertTrue(m, msg)
def assertGreaterEqual(self, got, expected, msg=None):
if not (got >= expected):
if msg is None:
msg = '%r not greater than or equal to %r' % (got, expected)
self.assertTrue(got >= expected, msg)
def assertLessEqual(self, got, expected, msg=None):
if not (got <= expected):
if msg is None:
msg = '%r not less than or equal to %r' % (got, expected)
self.assertTrue(got <= expected, msg)
def assertEqual(self, got, expected, msg=None):
if not (got == expected):
if msg is None:
msg = '%r not equal to %r' % (got, expected)
self.assertTrue(got == expected, msg)
def expect_warnings(ydl, warnings_re): def expect_warnings(ydl, warnings_re):
real_warning = ydl.report_warning real_warning = ydl.report_warning

View File

@ -153,6 +153,9 @@ class TestInfoExtractor(unittest.TestCase):
''' '''
search = self.ie._search_nextjs_data(html, 'testID') search = self.ie._search_nextjs_data(html, 'testID')
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid') self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
search = self.ie._search_nextjs_data(
'no next.js data here, move along', 'testID', default={'status': 0})
self.assertEqual(search['status'], 0)
def test_search_nuxt_data(self): def test_search_nuxt_data(self):
html = ''' html = '''
@ -993,7 +996,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 5997.485, 'tbr': 5997.485,
'width': 1920, 'width': 1920,
'height': 1080, 'height': 1080,
}] }],
{},
), ( ), (
# https://github.com/ytdl-org/youtube-dl/pull/14844 # https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only', 'urls_only',
@ -1076,7 +1080,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'tbr': 4400, 'tbr': 4400,
'width': 1920, 'width': 1920,
'height': 1080, 'height': 1080,
}] }],
{},
), ( ), (
# https://github.com/ytdl-org/youtube-dl/issues/20346 # https://github.com/ytdl-org/youtube-dl/issues/20346
# Media considered unfragmented even though it contains # Media considered unfragmented even though it contains
@ -1122,18 +1127,185 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 360, 'width': 360,
'height': 360, 'height': 360,
'fps': 30, 'fps': 30,
}] }],
{},
), (
# https://github.com/ytdl-org/youtube-dl/issues/30235
# Bento4 generated test mpd
# mp4dash --mpd-name=manifest.mpd --no-split --use-segment-list mediafiles
'url_and_range',
'http://unknown/manifest.mpd', # mpd_url
'http://unknown/', # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/',
'ext': 'm4a',
'format_id': 'audio-und-mp4a.40.2',
'format_note': 'DASH audio',
'container': 'm4a_dash',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 98.808,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/',
'ext': 'mp4',
'format_id': 'video-avc1',
'format_note': 'DASH video',
'container': 'mp4_dash',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4D401E',
'tbr': 699.597,
'width': 768,
'height': 432
}],
{},
), (
# https://github.com/ytdl-org/youtube-dl/issues/27575
# GPAC generated test mpd
# MP4Box -dash 10000 -single-file -out manifest.mpd mediafiles
'range_only',
'http://unknown/manifest.mpd', # mpd_url
'http://unknown/', # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/audio_dashinit.mp4',
'ext': 'm4a',
'format_id': '2',
'format_note': 'DASH audio',
'container': 'm4a_dash',
'protocol': 'http_dash_segments',
'acodec': 'mp4a.40.2',
'vcodec': 'none',
'tbr': 98.096,
}, {
'manifest_url': 'http://unknown/manifest.mpd',
'fragment_base_url': 'http://unknown/video_dashinit.mp4',
'ext': 'mp4',
'format_id': '1',
'format_note': 'DASH video',
'container': 'mp4_dash',
'protocol': 'http_dash_segments',
'acodec': 'none',
'vcodec': 'avc1.4D401E',
'tbr': 526.987,
'width': 768,
'height': 432
}],
{},
), (
'subtitles',
'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/',
[{
'format_id': 'audio=128001',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'm4a',
'tbr': 128.001,
'asr': 48000,
'format_note': 'DASH audio',
'container': 'm4a_dash',
'vcodec': 'none',
'acodec': 'mp4a.40.2',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=100000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 336,
'height': 144,
'tbr': 100,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=326000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 562,
'height': 240,
'tbr': 326,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=698000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 844,
'height': 360,
'tbr': 698,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=1493000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 1126,
'height': 480,
'tbr': 1493,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}, {
'format_id': 'video=4482000',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'ext': 'mp4',
'width': 1688,
'height': 720,
'tbr': 4482,
'format_note': 'DASH video',
'container': 'mp4_dash',
'vcodec': 'avc1.4D401F',
'acodec': 'none',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}],
{
'en': [
{
'ext': 'mp4',
'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd',
'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/',
'protocol': 'http_dash_segments',
}
]
},
) )
] ]
for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
with open('./test/testdata/mpd/%s.mpd' % mpd_file, with open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f: mode='r', encoding='utf-8') as f:
formats = self.ie._parse_mpd_formats( formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
compat_etree_fromstring(f.read().encode('utf-8')), compat_etree_fromstring(f.read().encode('utf-8')),
mpd_base_url=mpd_base_url, mpd_url=mpd_url) mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats) self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None) expect_value(self, formats, expected_formats, None)
expect_value(self, subtitles, expected_subtitles, None)
def test_parse_f4m_formats(self): def test_parse_f4m_formats(self):
_TEST_CASES = [ _TEST_CASES = [

View File

@ -23,6 +23,7 @@ from youtube_dl.compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus, compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
compat_urllib_request,
) )
@ -135,6 +136,19 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_casefold('\u03a3'), '\u03c3') self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3') self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
def test_compat_urllib_request_Request(self):
self.assertEqual(
compat_urllib_request.Request('http://127.0.0.1', method='PUT').get_method(),
'PUT')
class PUTrequest(compat_urllib_request.Request):
def get_method(self):
return 'PUT'
self.assertEqual(
PUTrequest('http://127.0.0.1').get_method(),
'PUT')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -9,7 +9,6 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import ( from test.helper import (
assertGreaterEqual,
expect_warnings, expect_warnings,
get_params, get_params,
gettestcases, gettestcases,
@ -35,12 +34,20 @@ from youtube_dl.utils import (
ExtractorError, ExtractorError,
error_to_compat_str, error_to_compat_str,
format_bytes, format_bytes,
IDENTITY,
preferredencoding,
UnavailableVideoError, UnavailableVideoError,
) )
from youtube_dl.extractor import get_info_extractor from youtube_dl.extractor import get_info_extractor
RETRIES = 3 RETRIES = 3
# Some unittest APIs require actual str
if not isinstance('TEST', str):
_encode_str = lambda s: s.encode(preferredencoding())
else:
_encode_str = IDENTITY
class YoutubeDL(youtube_dl.YoutubeDL): class YoutubeDL(youtube_dl.YoutubeDL):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -101,7 +108,7 @@ def generator(test_case, tname):
def print_skipping(reason): def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason)) print('Skipping %s: %s' % (test_case['name'], reason))
self.skipTest(reason) self.skipTest(_encode_str(reason))
if not ie.working(): if not ie.working():
print_skipping('IE marked as not _WORKING') print_skipping('IE marked as not _WORKING')
@ -122,7 +129,10 @@ def generator(test_case, tname):
params['outtmpl'] = tname + '_' + params['outtmpl'] params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case: if is_playlist and 'playlist' not in test_case:
params.setdefault('extract_flat', 'in_playlist') params.setdefault('extract_flat', 'in_playlist')
params.setdefault('playlistend', test_case.get('playlist_mincount')) params.setdefault('playlistend',
test_case['playlist_maxcount'] + 1
if test_case.get('playlist_maxcount')
else test_case.get('playlist_mincount'))
params.setdefault('skip_download', True) params.setdefault('skip_download', True)
ydl = YoutubeDL(params, auto_init=False) ydl = YoutubeDL(params, auto_init=False)
@ -183,13 +193,19 @@ def generator(test_case, tname):
expect_info_dict(self, res_dict, test_case.get('info_dict', {})) expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
if 'playlist_mincount' in test_case: if 'playlist_mincount' in test_case:
assertGreaterEqual( self.assertGreaterEqual(
self,
len(res_dict['entries']), len(res_dict['entries']),
test_case['playlist_mincount'], test_case['playlist_mincount'],
'Expected at least %d in playlist %s, but got only %d' % ( 'Expected at least %d in playlist %s, but got only %d' % (
test_case['playlist_mincount'], test_case['url'], test_case['playlist_mincount'], test_case['url'],
len(res_dict['entries']))) len(res_dict['entries'])))
if 'playlist_maxcount' in test_case:
self.assertLessEqual(
len(res_dict['entries']),
test_case['playlist_maxcount'],
'Expected at most %d in playlist %s, but got %d' % (
test_case['playlist_maxcount'], test_case['url'],
len(res_dict['entries'])))
if 'playlist_count' in test_case: if 'playlist_count' in test_case:
self.assertEqual( self.assertEqual(
len(res_dict['entries']), len(res_dict['entries']),
@ -231,8 +247,8 @@ def generator(test_case, tname):
if params.get('test'): if params.get('test'):
expected_minsize = max(expected_minsize, 10000) expected_minsize = max(expected_minsize, 10000)
got_fsize = os.path.getsize(tc_filename) got_fsize = os.path.getsize(tc_filename)
assertGreaterEqual( self.assertGreaterEqual(
self, got_fsize, expected_minsize, got_fsize, expected_minsize,
'Expected %s to be at least %s, but it\'s only %s ' % 'Expected %s to be at least %s, but it\'s only %s ' %
(tc_filename, format_bytes(expected_minsize), (tc_filename, format_bytes(expected_minsize),
format_bytes(got_fsize))) format_bytes(got_fsize)))

View File

@ -18,6 +18,7 @@ from test.helper import (
) )
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_contextlib_suppress,
compat_http_cookiejar_Cookie, compat_http_cookiejar_Cookie,
compat_http_server, compat_http_server,
compat_kwargs, compat_kwargs,
@ -35,6 +36,9 @@ from youtube_dl.downloader.external import (
HttpieFD, HttpieFD,
WgetFD, WgetFD,
) )
from youtube_dl.postprocessor import (
FFmpegPostProcessor,
)
import threading import threading
TEST_SIZE = 10 * 1024 TEST_SIZE = 10 * 1024
@ -227,7 +231,17 @@ class TestAria2cFD(unittest.TestCase):
self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd) self.assertIn('--load-cookies=%s' % downloader._cookies_tempfile, cmd)
@ifExternalFDAvailable(FFmpegFD) # Handle delegated availability
def ifFFmpegFDAvailable(externalFD):
# raise SkipTest, or set False!
avail = ifExternalFDAvailable(externalFD) and False
with compat_contextlib_suppress(Exception):
avail = FFmpegPostProcessor(downloader=None).available
return unittest.skipUnless(
avail, externalFD.get_basename() + ' not found')
@ifFFmpegFDAvailable(FFmpegFD)
class TestFFmpegFD(unittest.TestCase): class TestFFmpegFD(unittest.TestCase):
_args = [] _args = []

View File

@ -11,194 +11,146 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import math import math
import re import re
from youtube_dl.compat import compat_str
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
NaN = object()
class TestJSInterpreter(unittest.TestCase): class TestJSInterpreter(unittest.TestCase):
def _test(self, jsi_or_code, expected, func='f', args=()):
if isinstance(jsi_or_code, compat_str):
jsi_or_code = JSInterpreter(jsi_or_code)
got = jsi_or_code.call_function(func, *args)
if expected is NaN:
self.assertTrue(math.isnan(got), '{0} is not NaN'.format(got))
else:
self.assertEqual(got, expected)
def test_basic(self): def test_basic(self):
jsi = JSInterpreter('function x(){;}') jsi = JSInterpreter('function f(){;}')
self.assertEqual(jsi.call_function('x'), None) self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
self.assertEqual(repr(jsi.extract_function('x')), 'F<x>') self._test(jsi, None)
jsi = JSInterpreter('function x3(){return 42;}') self._test('function f(){return 42;}', 42)
self.assertEqual(jsi.call_function('x3'), 42) self._test('function f(){42}', None)
self._test('var f = function(){return 42;}', 42)
jsi = JSInterpreter('function x3(){42}')
self.assertEqual(jsi.call_function('x3'), None)
jsi = JSInterpreter('var x5 = function(){return 42;}')
self.assertEqual(jsi.call_function('x5'), 42)
def test_calc(self):
jsi = JSInterpreter('function x4(a){return 2*a+1;}')
self.assertEqual(jsi.call_function('x4', 3), 7)
def test_add(self): def test_add(self):
jsi = JSInterpreter('function f(){return 42 + 7;}') self._test('function f(){return 42 + 7;}', 49)
self.assertEqual(jsi.call_function('f'), 49) self._test('function f(){return 42 + undefined;}', NaN)
jsi = JSInterpreter('function f(){return 42 + undefined;}') self._test('function f(){return 42 + null;}', 42)
self.assertTrue(math.isnan(jsi.call_function('f')))
jsi = JSInterpreter('function f(){return 42 + null;}')
self.assertEqual(jsi.call_function('f'), 42)
def test_sub(self): def test_sub(self):
jsi = JSInterpreter('function f(){return 42 - 7;}') self._test('function f(){return 42 - 7;}', 35)
self.assertEqual(jsi.call_function('f'), 35) self._test('function f(){return 42 - undefined;}', NaN)
jsi = JSInterpreter('function f(){return 42 - undefined;}') self._test('function f(){return 42 - null;}', 42)
self.assertTrue(math.isnan(jsi.call_function('f')))
jsi = JSInterpreter('function f(){return 42 - null;}')
self.assertEqual(jsi.call_function('f'), 42)
def test_mul(self): def test_mul(self):
jsi = JSInterpreter('function f(){return 42 * 7;}') self._test('function f(){return 42 * 7;}', 294)
self.assertEqual(jsi.call_function('f'), 294) self._test('function f(){return 42 * undefined;}', NaN)
jsi = JSInterpreter('function f(){return 42 * undefined;}') self._test('function f(){return 42 * null;}', 0)
self.assertTrue(math.isnan(jsi.call_function('f')))
jsi = JSInterpreter('function f(){return 42 * null;}')
self.assertEqual(jsi.call_function('f'), 0)
def test_div(self): def test_div(self):
jsi = JSInterpreter('function f(a, b){return a / b;}') jsi = JSInterpreter('function f(a, b){return a / b;}')
self.assertTrue(math.isnan(jsi.call_function('f', 0, 0))) self._test(jsi, NaN, args=(0, 0))
self.assertTrue(math.isnan(jsi.call_function('f', JS_Undefined, 1))) self._test(jsi, NaN, args=(JS_Undefined, 1))
self.assertTrue(math.isinf(jsi.call_function('f', 2, 0))) self._test(jsi, float('inf'), args=(2, 0))
self.assertEqual(jsi.call_function('f', 0, 3), 0) self._test(jsi, 0, args=(0, 3))
def test_mod(self): def test_mod(self):
jsi = JSInterpreter('function f(){return 42 % 7;}') self._test('function f(){return 42 % 7;}', 0)
self.assertEqual(jsi.call_function('f'), 0) self._test('function f(){return 42 % 0;}', NaN)
jsi = JSInterpreter('function f(){return 42 % 0;}') self._test('function f(){return 42 % undefined;}', NaN)
self.assertTrue(math.isnan(jsi.call_function('f')))
jsi = JSInterpreter('function f(){return 42 % undefined;}')
self.assertTrue(math.isnan(jsi.call_function('f')))
def test_exp(self): def test_exp(self):
jsi = JSInterpreter('function f(){return 42 ** 2;}') self._test('function f(){return 42 ** 2;}', 1764)
self.assertEqual(jsi.call_function('f'), 1764) self._test('function f(){return 42 ** undefined;}', NaN)
jsi = JSInterpreter('function f(){return 42 ** undefined;}') self._test('function f(){return 42 ** null;}', 1)
self.assertTrue(math.isnan(jsi.call_function('f'))) self._test('function f(){return undefined ** 42;}', NaN)
jsi = JSInterpreter('function f(){return 42 ** null;}')
self.assertEqual(jsi.call_function('f'), 1) def test_calc(self):
jsi = JSInterpreter('function f(){return undefined ** 42;}') self._test('function f(a){return 2*a+1;}', 7, args=[3])
self.assertTrue(math.isnan(jsi.call_function('f')))
def test_empty_return(self): def test_empty_return(self):
jsi = JSInterpreter('function f(){return; y()}') self._test('function f(){return; y()}', None)
self.assertEqual(jsi.call_function('f'), None)
def test_morespace(self): def test_morespace(self):
jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }') self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3])
self.assertEqual(jsi.call_function('x', 3), 7) self._test('function f () { x = 2 ; return x; }', 2)
jsi = JSInterpreter('function f () { x = 2 ; return x; }')
self.assertEqual(jsi.call_function('f'), 2)
def test_strange_chars(self): def test_strange_chars(self):
jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }') self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }',
self.assertEqual(jsi.call_function('$_xY1', 20), 21) 21, args=[20], func='$_xY1')
def test_operators(self): def test_operators(self):
jsi = JSInterpreter('function f(){return 1 << 5;}') self._test('function f(){return 1 << 5;}', 32)
self.assertEqual(jsi.call_function('f'), 32) self._test('function f(){return 2 ** 5}', 32)
self._test('function f(){return 19 & 21;}', 17)
jsi = JSInterpreter('function f(){return 2 ** 5}') self._test('function f(){return 11 >> 2;}', 2)
self.assertEqual(jsi.call_function('f'), 32) self._test('function f(){return []? 2+3: 4;}', 5)
self._test('function f(){return 1 == 2}', False)
jsi = JSInterpreter('function f(){return 19 & 21;}') self._test('function f(){return 0 && 1 || 2;}', 2)
self.assertEqual(jsi.call_function('f'), 17) self._test('function f(){return 0 ?? 42;}', 0)
self._test('function f(){return "life, the universe and everything" < 42;}', False)
jsi = JSInterpreter('function f(){return 11 >> 2;}') # https://github.com/ytdl-org/youtube-dl/issues/32815
self.assertEqual(jsi.call_function('f'), 2) self._test('function f(){return 0 - 7 * - 6;}', 42)
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
self.assertEqual(jsi.call_function('f'), 5)
jsi = JSInterpreter('function f(){return 1 == 2}')
self.assertEqual(jsi.call_function('f'), False)
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
self.assertEqual(jsi.call_function('f'), 2)
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
self.assertEqual(jsi.call_function('f'), 0)
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
self.assertFalse(jsi.call_function('f'))
def test_array_access(self): def test_array_access(self):
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}') self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
def test_parens(self): def test_parens(self):
jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}') self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7)
self.assertEqual(jsi.call_function('f'), 7) self._test('function f(){return (1 + 2) * 3;}', 9)
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
self.assertEqual(jsi.call_function('f'), 9)
def test_quotes(self): def test_quotes(self):
jsi = JSInterpreter(r'function f(){return "a\"\\("}') self._test(r'function f(){return "a\"\\("}', r'a"\(')
self.assertEqual(jsi.call_function('f'), r'a"\(')
def test_assignments(self): def test_assignments(self):
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}') self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
self.assertEqual(jsi.call_function('f'), 31) self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), 51)
jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}')
self.assertEqual(jsi.call_function('f'), -11)
@unittest.skip('Not yet fully implemented')
def test_comments(self): def test_comments(self):
'Skipping: Not yet fully implemented' self._test('''
return function f() {
jsi = JSInterpreter('''
function x() {
var x = /* 1 + */ 2; var x = /* 1 + */ 2;
var y = /* 30 var y = /* 30
* 40 */ 50; * 40 */ 50;
return x + y; return x + y;
} }
''') ''', 52)
self.assertEqual(jsi.call_function('x'), 52)
jsi = JSInterpreter(''' self._test('''
function f() { function f() {
var x = "/*"; var x = "/*";
var y = 1 /* comment */ + 2; var y = 1 /* comment */ + 2;
return y; return y;
} }
''') ''', 3)
self.assertEqual(jsi.call_function('f'), 3)
def test_precedence(self): def test_precedence(self):
jsi = JSInterpreter(''' self._test('''
function x() { function f() {
var a = [10, 20, 30, 40, 50]; var a = [10, 20, 30, 40, 50];
var b = 6; var b = 6;
a[0]=a[b%a.length]; a[0]=a[b%a.length];
return a; return a;
}''') }
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) ''', [20, 20, 30, 40, 50])
def test_builtins(self): def test_builtins(self):
jsi = JSInterpreter(''' self._test('function f() { return NaN }', NaN)
function x() { return NaN }
''')
self.assertTrue(math.isnan(jsi.call_function('x')))
def test_Date(self): def test_Date(self):
jsi = JSInterpreter(''' self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
function x(dt) { return new Date(dt) - 0; }
''')
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
# date format m/d/y # date format m/d/y
self.assertEqual(jsi.call_function('x', '12/31/1969 18:01:26 MDT'), 86000) self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
# epoch 0 # epoch 0
self.assertEqual(jsi.call_function('x', '1 January 1970 00:00:00 UTC'), 0) self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
def test_call(self): def test_call(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
@ -206,179 +158,115 @@ class TestJSInterpreter(unittest.TestCase):
function y(a) { return x() + (a?a:0); } function y(a) { return x() + (a?a:0); }
function z() { return y(3); } function z() { return y(3); }
''') ''')
self.assertEqual(jsi.call_function('z'), 5) self._test(jsi, 5, func='z')
self.assertEqual(jsi.call_function('y'), 2) self._test(jsi, 2, func='y')
def test_if(self): def test_if(self):
jsi = JSInterpreter(''' self._test('''
function x() { function f() {
let a = 9; let a = 9;
if (0==0) {a++} if (0==0) {a++}
return a return a
}''') }
self.assertEqual(jsi.call_function('x'), 10) ''', 10)
jsi = JSInterpreter(''' self._test('''
function x() { function f() {
if (0==0) {return 10} if (0==0) {return 10}
}''') }
self.assertEqual(jsi.call_function('x'), 10) ''', 10)
jsi = JSInterpreter(''' self._test('''
function x() { function f() {
if (0!=0) {return 1} if (0!=0) {return 1}
else {return 10} else {return 10}
}''') }
self.assertEqual(jsi.call_function('x'), 10) ''', 10)
""" # Unsupported
jsi = JSInterpreter('''
function x() {
if (0!=0) return 1;
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
"""
def test_elseif(self): def test_elseif(self):
jsi = JSInterpreter(''' self._test('''
function x() { function f() {
if (0!=0) {return 1} if (0!=0) {return 1}
else if (1==0) {return 2} else if (1==0) {return 2}
else {return 10} else {return 10}
}''') }
self.assertEqual(jsi.call_function('x'), 10) ''', 10)
""" # Unsupported
jsi = JSInterpreter('''
function x() {
if (0!=0) return 1;
else if (1==0) {return 2}
else {return 10}
}''')
self.assertEqual(jsi.call_function('x'), 10)
# etc
"""
def test_for_loop(self): def test_for_loop(self):
# function x() { a=0; for (i=0; i-10; i++) {a++} a } self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10)
jsi = JSInterpreter('''
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
''')
self.assertEqual(jsi.call_function('x'), 10)
def test_while_loop(self): def test_while_loop(self):
# function x() { a=0; while (a<10) {a++} a } self._test('function f() { a=0; while (a<10) {a++} return a }', 10)
jsi = JSInterpreter('''
function x() { a=0; while (a<10) {a++} return a }
''')
self.assertEqual(jsi.call_function('x'), 10)
def test_switch(self): def test_switch(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x(f) { switch(f){ function f(x) { switch(x){
case 1:f+=1; case 1:x+=1;
case 2:f+=2; case 2:x+=2;
case 3:f+=3;break; case 3:x+=3;break;
case 4:f+=4; case 4:x+=4;
default:f=0; default:x=0;
} return f } } return x }
''') ''')
self.assertEqual(jsi.call_function('x', 1), 7) self._test(jsi, 7, args=[1])
self.assertEqual(jsi.call_function('x', 3), 6) self._test(jsi, 6, args=[3])
self.assertEqual(jsi.call_function('x', 5), 0) self._test(jsi, 0, args=[5])
def test_switch_default(self): def test_switch_default(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x(f) { switch(f){ function f(x) { switch(x){
case 2: f+=2; case 2: x+=2;
default: f-=1; default: x-=1;
case 5: case 5:
case 6: f+=6; case 6: x+=6;
case 0: break; case 0: break;
case 1: f+=1; case 1: x+=1;
} return f } } return x }
''') ''')
self.assertEqual(jsi.call_function('x', 1), 2) self._test(jsi, 2, args=[1])
self.assertEqual(jsi.call_function('x', 5), 11) self._test(jsi, 11, args=[5])
self.assertEqual(jsi.call_function('x', 9), 14) self._test(jsi, 14, args=[9])
def test_try(self): def test_try(self):
jsi = JSInterpreter(''' self._test('function f() { try{return 10} catch(e){return 5} }', 10)
function x() { try{return 10} catch(e){return 5} }
''')
self.assertEqual(jsi.call_function('x'), 10)
def test_catch(self): def test_catch(self):
jsi = JSInterpreter(''' self._test('function f() { try{throw 10} catch(e){return 5} }', 5)
function x() { try{throw 10} catch(e){return 5} }
''')
self.assertEqual(jsi.call_function('x'), 5)
def test_finally(self): def test_finally(self):
jsi = JSInterpreter(''' self._test('function f() { try{throw 10} finally {return 42} }', 42)
function x() { try{throw 10} finally {return 42} } self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42)
''')
self.assertEqual(jsi.call_function('x'), 42)
jsi = JSInterpreter('''
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
''')
self.assertEqual(jsi.call_function('x'), 42)
def test_nested_try(self): def test_nested_try(self):
jsi = JSInterpreter(''' self._test('''
function x() {try { function f() {try {
try{throw 10} finally {throw 42} try{throw 10} finally {throw 42}
} catch(e){return 5} } } catch(e){return 5} }
''') ''', 5)
self.assertEqual(jsi.call_function('x'), 5)
def test_for_loop_continue(self): def test_for_loop_continue(self):
jsi = JSInterpreter(''' self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0)
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
''')
self.assertEqual(jsi.call_function('x'), 0)
def test_for_loop_break(self): def test_for_loop_break(self):
jsi = JSInterpreter(''' self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0)
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
''')
self.assertEqual(jsi.call_function('x'), 0)
def test_for_loop_try(self): def test_for_loop_try(self):
jsi = JSInterpreter(''' self._test('''
function x() { function f() {
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} }; for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
return 42 } return 42 }
''') ''', 42)
self.assertEqual(jsi.call_function('x'), 42)
def test_literal_list(self): def test_literal_list(self):
jsi = JSInterpreter(''' self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7])
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
''')
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
def test_comma(self): def test_comma(self):
jsi = JSInterpreter(''' self._test('function f() { a=5; a -= 1, a+=3; return a }', 7)
function x() { a=5; a -= 1, a+=3; return a } self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
''') self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
self.assertEqual(jsi.call_function('x'), 7)
jsi = JSInterpreter('''
function x() { a=5; return (a -= 1, a+=3, a); }
''')
self.assertEqual(jsi.call_function('x'), 7)
jsi = JSInterpreter('''
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
''')
self.assertEqual(jsi.call_function('x'), 5)
def test_void(self): def test_void(self):
jsi = JSInterpreter(''' self._test('function f() { return void 42; }', None)
function x() { return void 42; }
''')
self.assertEqual(jsi.call_function('x'), None)
def test_return_function(self): def test_return_function(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
@ -387,61 +275,42 @@ class TestJSInterpreter(unittest.TestCase):
self.assertEqual(jsi.call_function('x')([]), 1) self.assertEqual(jsi.call_function('x')([]), 1)
def test_null(self): def test_null(self):
jsi = JSInterpreter(''' self._test('function f() { return null; }', None)
function x() { return null; } self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }',
''') [False, False, False, False])
self.assertIs(jsi.call_function('x'), None) self._test('function f() { return [null >= 0, null <= 0]; }', [True, True])
jsi = JSInterpreter('''
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
jsi = JSInterpreter('''
function x() { return [null >= 0, null <= 0]; }
''')
self.assertEqual(jsi.call_function('x'), [True, True])
def test_undefined(self): def test_undefined(self):
jsi = JSInterpreter(''' self._test('function f() { return undefined === undefined; }', True)
function x() { return undefined === undefined; } self._test('function f() { return undefined; }', JS_Undefined)
''') self._test('function f() {return undefined ?? 42; }', 42)
self.assertTrue(jsi.call_function('x')) self._test('function f() { let v; return v; }', JS_Undefined)
self._test('function f() { let v; return v**0; }', 1)
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
[False, False, JS_Undefined, JS_Undefined])
jsi = JSInterpreter(''' self._test('''
function x() { return undefined; } function f() { return [
''') undefined === undefined,
self.assertIs(jsi.call_function('x'), JS_Undefined) undefined == undefined,
undefined == null
jsi = JSInterpreter(''' ]; }
function x() { let v; return v; } ''', [True] * 3)
''') self._test('''
self.assertIs(jsi.call_function('x'), JS_Undefined) function f() { return [
undefined < undefined,
jsi = JSInterpreter(''' undefined > undefined,
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; } undefined === 0,
''') undefined == 0,
self.assertEqual(jsi.call_function('x'), [True, True, False, False]) undefined < 0,
undefined > 0,
jsi = JSInterpreter(''' undefined >= 0,
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; } undefined <= 0,
''') undefined > null,
self.assertEqual(jsi.call_function('x'), [False, False, False, False]) undefined < null,
undefined === null
jsi = JSInterpreter(''' ]; }
function x() { return [undefined >= 0, undefined <= 0]; } ''', [False] * 11)
''')
self.assertEqual(jsi.call_function('x'), [False, False])
jsi = JSInterpreter('''
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
jsi = JSInterpreter('''
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
''')
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; } function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
@ -449,45 +318,14 @@ class TestJSInterpreter(unittest.TestCase):
for y in jsi.call_function('x'): for y in jsi.call_function('x'):
self.assertTrue(math.isnan(y)) self.assertTrue(math.isnan(y))
jsi = JSInterpreter('''
function x() { let v; return v**0; }
''')
self.assertEqual(jsi.call_function('x'), 1)
jsi = JSInterpreter('''
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
''')
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
self.assertEqual(jsi.call_function('x'), 42)
def test_object(self): def test_object(self):
jsi = JSInterpreter(''' self._test('function f() { return {}; }', {})
function x() { return {}; } self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0])
''') self._test('function f() { let a; return a?.qq; }', JS_Undefined)
self.assertEqual(jsi.call_function('x'), {}) self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
jsi = JSInterpreter('''
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
''')
self.assertEqual(jsi.call_function('x'), [42, 0])
jsi = JSInterpreter('''
function x() { let a; return a?.qq; }
''')
self.assertIs(jsi.call_function('x'), JS_Undefined)
jsi = JSInterpreter('''
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
''')
self.assertIs(jsi.call_function('x'), JS_Undefined)
def test_regex(self): def test_regex(self):
jsi = JSInterpreter(''' self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
function x() { let a=/,,[/,913,/](,)}/; }
''')
self.assertIs(jsi.call_function('x'), None)
jsi = JSInterpreter(''' jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; } function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
@ -504,88 +342,116 @@ class TestJSInterpreter(unittest.TestCase):
''') ''')
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I) self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
jsi = JSInterpreter(r''' jsi = JSInterpreter(r'function f() { let a=/,][}",],()}(\[)/; return a; }')
function x() { let a="data-name".replace("data-", ""); return a } self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)')
''')
self.assertEqual(jsi.call_function('x'), 'name')
jsi = JSInterpreter(r''' jsi = JSInterpreter(r'function f() { let a=[/[)\\]/]; return a[0]; }')
function x() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; } self.assertEqual(jsi.call_function('f').pattern, r'[)\\]')
''')
self.assertEqual(jsi.call_function('x'), 'name')
jsi = JSInterpreter(r''' def test_replace(self):
function x() { let a="data-name".replace(/^.+-/, ""); return a; } self._test('function f() { let a="data-name".replace("data-", ""); return a }',
''') 'name')
self.assertEqual(jsi.call_function('x'), 'name') self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }',
'name')
jsi = JSInterpreter(r''' self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }',
function x() { let a="data-name".replace(/a/g, "o"); return a; } 'name')
''') self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }',
self.assertEqual(jsi.call_function('x'), 'doto-nome') 'doto-nome')
self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }',
jsi = JSInterpreter(r''' 'doto-nome')
function x() { let a="data-name".replaceAll("a", "o"); return a; }
''')
self.assertEqual(jsi.call_function('x'), 'doto-nome')
jsi = JSInterpreter(r'''
function x() { let a=[/[)\\]/]; return a[0]; }
''')
self.assertEqual(jsi.call_function('x').pattern, r'[)\\]')
""" # fails
jsi = JSInterpreter(r'''
function x() { let a=100; a/=/[0-9]+/.exec('divide by 20 today')[0]; }
''')
self.assertEqual(jsi.call_function('x'), 5)
"""
def test_char_code_at(self): def test_char_code_at(self):
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}') jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
self.assertEqual(jsi.call_function('x', 0), 116) self._test(jsi, 116, args=[0])
self.assertEqual(jsi.call_function('x', 1), 101) self._test(jsi, 101, args=[1])
self.assertEqual(jsi.call_function('x', 2), 115) self._test(jsi, 115, args=[2])
self.assertEqual(jsi.call_function('x', 3), 116) self._test(jsi, 116, args=[3])
self.assertEqual(jsi.call_function('x', 4), None) self._test(jsi, None, args=[4])
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116) self._test(jsi, 116, args=['not_a_number'])
def test_bitwise_operators_overflow(self): def test_bitwise_operators_overflow(self):
jsi = JSInterpreter('function x(){return -524999584 << 5}') self._test('function f(){return -524999584 << 5}', 379882496)
self.assertEqual(jsi.call_function('x'), 379882496) self._test('function f(){return 1236566549 << 5}', 915423904)
jsi = JSInterpreter('function x(){return 1236566549 << 5}') def test_bitwise_operators_typecast(self):
self.assertEqual(jsi.call_function('x'), 915423904) # madness
self._test('function f(){return null << 5}', 0)
self._test('function f(){return undefined >> 5}', 0)
self._test('function f(){return 42 << NaN}', 42)
self._test('function f(){return 42 << Infinity}', 42)
def test_bitwise_operators_madness(self): def test_negative(self):
jsi = JSInterpreter('function x(){return null << 5}') self._test('function f(){return 2 * -2.0 ;}', -4)
self.assertEqual(jsi.call_function('x'), 0) self._test('function f(){return 2 - - -2 ;}', 0)
self._test('function f(){return 2 - - - -2 ;}', 4)
jsi = JSInterpreter('function x(){return undefined >> 5}') self._test('function f(){return 2 - + + - -2;}', 0)
self.assertEqual(jsi.call_function('x'), 0) self._test('function f(){return 2 + - + - -2;}', 0)
jsi = JSInterpreter('function x(){return 42 << NaN}')
self.assertEqual(jsi.call_function('x'), 42)
jsi = JSInterpreter('function x(){return 42 << Infinity}')
self.assertEqual(jsi.call_function('x'), 42)
def test_32066(self): def test_32066(self):
jsi = JSInterpreter("function x(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}") self._test(
self.assertEqual(jsi.call_function('x'), 70) "function f(){return Math.pow(3, 5) + new Date('1970-01-01T08:01:42.000+08:00') / 1000 * -239 - -24205;}",
70)
def test_unary_operators(self): @unittest.skip('Not yet working')
jsi = JSInterpreter('function f(){return 2 - - - 2;}')
self.assertEqual(jsi.call_function('f'), 0)
# fails
# jsi = JSInterpreter('function f(){return 2 + - + - - 2;}')
# self.assertEqual(jsi.call_function('f'), 0)
""" # fails so far
def test_packed(self): def test_packed(self):
jsi = JSInterpreter('''function x(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''') self._test(
self.assertEqual(jsi.call_function('x', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))) '''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''',
""" '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))
def test_join(self):
test_input = list('test')
tests = [
'function f(a, b){return a.join(b)}',
'function f(a, b){return Array.prototype.join.call(a, b)}',
'function f(a, b){return Array.prototype.join.apply(a, [b])}',
]
for test in tests:
jsi = JSInterpreter(test)
self._test(jsi, 'test', args=[test_input, ''])
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
self._test(jsi, '', args=[[], '-'])
def test_split(self):
test_result = list('test')
tests = [
'function f(a, b){return a.split(b)}',
'function f(a, b){return String.prototype.split.call(a, b)}',
'function f(a, b){return String.prototype.split.apply(a, [b])}',
]
for test in tests:
jsi = JSInterpreter(test)
self._test(jsi, test_result, args=['test', ''])
self._test(jsi, test_result, args=['t-e-s-t', '-'])
self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', ''])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
self._test('function f(){return "012345678".slice()}', '012345678')
self._test('function f(){return "012345678".slice(0)}', '012345678')
self._test('function f(){return "012345678".slice(5)}', '5678')
self._test('function f(){return "012345678".slice(99)}', '')
self._test('function f(){return "012345678".slice(-2)}', '78')
self._test('function f(){return "012345678".slice(-99)}', '012345678')
self._test('function f(){return "012345678".slice(0, 0)}', '')
self._test('function f(){return "012345678".slice(1, 0)}', '')
self._test('function f(){return "012345678".slice(0, 1)}', '0')
self._test('function f(){return "012345678".slice(3, 6)}', '345')
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -295,6 +295,7 @@ class TestNRKSubtitles(BaseTestSubtitles):
def test_allsubtitles(self): def test_allsubtitles(self):
self.DL.params['writesubtitles'] = True self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True self.DL.params['allsubtitles'] = True
self.DL.params['format'] = 'best/bestvideo'
subtitles = self.getSubtitles() subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), set(['nb-ttv'])) self.assertEqual(set(subtitles.keys()), set(['nb-ttv']))
self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149') self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')

509
test/test_traversal.py Normal file
View File

@ -0,0 +1,509 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import re
from youtube_dl.traversal import (
dict_get,
get_first,
T,
traverse_obj,
)
from youtube_dl.compat import (
compat_etree_fromstring,
compat_http_cookies,
compat_str,
)
from youtube_dl.utils import (
int_or_none,
str_or_none,
)
_TEST_DATA = {
100: 100,
1.2: 1.2,
'str': 'str',
'None': None,
'...': Ellipsis,
'urls': [
{'index': 0, 'url': 'https://www.example.com/0'},
{'index': 1, 'url': 'https://www.example.com/1'},
],
'data': (
{'index': 2},
{'index': 3},
),
'dict': {},
}
if sys.version_info < (3, 0):
class _TestCase(unittest.TestCase):
def assertCountEqual(self, *args, **kwargs):
return self.assertItemsEqual(*args, **kwargs)
else:
_TestCase = unittest.TestCase
class TestTraversal(_TestCase):
def assertMaybeCountEqual(self, *args, **kwargs):
if sys.version_info < (3, 7):
# random dict order
return self.assertCountEqual(*args, **kwargs)
else:
return self.assertEqual(*args, **kwargs)
def test_traverse_obj(self):
# instant compat
str = compat_str
# define a pukka Iterable
def iter_range(stop):
for from_ in range(stop):
yield from_
# Test base functionality
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
msg='allow tuple path')
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
msg='allow list path')
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
msg='allow iterable path')
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
msg='single items should be treated as a path')
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
# Test Ellipsis behavior
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
(item for item in _TEST_DATA.values() if item not in (None, {})),
msg='`...` should give all non-discarded values')
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
msg='`...` selection for dicts should select all values')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='nested `...` queries should work')
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
msg='`...` query result should be flattened')
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
msg='`...` should accept iterables')
# Test function as key
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
[_TEST_DATA['urls']],
msg='function as query key should perform a filter based on (key, value)')
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
msg='exceptions in the query function should be caught')
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
msg='function key should accept iterables')
if __debug__:
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
traverse_obj(_TEST_DATA, lambda a: Ellipsis)
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
# Test set as key (transformation/type, like `expected_type`)
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
msg='Function in set should be a transformation')
self.assertEqual(traverse_obj(_TEST_DATA, ('fail', T(lambda _: 'const'))), 'const',
msg='Function in set should always be called')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
msg='Type in set should be a type filter')
self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str, int))), [100, 'str'],
msg='Multiple types in set should be a type filter')
self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
msg='A single set should be wrapped into a path')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
msg='Transformation function should not raise')
self.assertMaybeCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
msg='Function in set should be a transformation')
if __debug__:
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
traverse_obj(_TEST_DATA, set())
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
traverse_obj(_TEST_DATA, set((str.upper, str)))
# Test `slice` as a key
_SLICE_DATA = [0, 1, 2, 3, 4]
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
msg='slice on a dictionary should not throw')
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
msg='slice key should apply slice to sequence')
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
msg='slice key should apply slice to sequence')
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
msg='slice key should apply slice to sequence')
# Test alternative paths
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
msg='multiple `paths` should be treated as alternative paths')
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
msg='alternatives should exit early')
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
msg='alternatives should return `default` if exhausted')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
msg='alternatives should track their own branching return')
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
msg='alternatives on empty objects should search further')
# Test branch and path nesting
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
msg='tuple as key should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
msg='list as key should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
msg='double nesting in path should be treated as paths')
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
msg='do not fail early on branching')
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='triple nesting in path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='ellipsis as branch path start gets flattened')
# Test dictionary as key
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
msg='dict key should result in a dict with the same keys')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
{0: 'https://www.example.com/0'},
msg='dict key should allow paths')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
{0: ['https://www.example.com/0']},
msg='tuple in dict path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
{0: ['https://www.example.com/0']},
msg='double nesting in dict path should be treated as paths')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
msg='triple nesting in dict path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
msg='remove `None` values when top level dict key fails')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
msg='use `default` if key fails and `default`')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
msg='remove empty values when dict key')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
msg='use `default` when dict key and a default')
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
msg='remove empty values when nested dict key fails')
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
msg='default to dict if pruned')
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
msg='default to dict if pruned and default is given')
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
msg='use nested `default` when nested dict key fails and `default`')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
msg='remove key if branch in dict key not successful')
# Testing default parameter behavior
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
msg='default value should be `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
msg='chained fails should result in default')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
msg='should not short cirquit on `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
msg='invalid dict key should result in `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
msg='`None` is a deliberate sentinel and should become `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
msg='`IndexError` should result in `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
msg='if branched but not successful return `default` if defined, not `[]`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
msg='if branched but not successful return `default` even if `default` is `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
msg='if branched but not successful return `[]`, not `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
msg='if branched but object is empty return `[]`, not `default`')
self.assertEqual(traverse_obj(None, Ellipsis), [],
msg='if branched but object is `None` return `[]`, not `default`')
self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
msg='if branched but state is `None` return `[]`, not `default`')
branching_paths = [
('fail', Ellipsis),
(Ellipsis, 'fail'),
100 * ('fail',) + (Ellipsis,),
(Ellipsis,) + 100 * ('fail',),
]
for branching_path in branching_paths:
self.assertEqual(traverse_obj({}, branching_path), [],
msg='if branched but state is `None`, return `[]` (not `default`)')
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
msg='if branching in last alternative and previous did match, return single value')
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
msg='if branching in first alternative and non-branching path does match, return single value')
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
msg='if branching in first alternative and non-branching path does not match, return `default`')
# Testing expected_type behavior
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
'str', msg='accept matching `expected_type` type')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
None, msg='reject non-matching `expected_type` type')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
'0', msg='transform type using type function')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
None, msg='wrap expected_type function in try_call')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
['str'], msg='eliminate items that expected_type fails on')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
{0: 100}, msg='type as expected_type should filter dict values')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
1, msg='expected_type should not filter non-final dict values')
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
{0: {0: 100}}, msg='expected_type should transform deep dict values')
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
[{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
[4], msg='expected_type regression for type matching in tuple branching')
self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
[], msg='expected_type regression for type matching in dict result')
# Test get_all behavior
_GET_ALL_DATA = {'key': [0, 1, 2]}
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
msg='if not `get_all`, return only first matching value')
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
msg='do not overflatten if not `get_all`')
# Test casesense behavior
_CASESENSE_DATA = {
'KeY': 'value0',
0: {
'KeY': 'value1',
0: {'KeY': 'value2'},
},
# FULLWIDTH LATIN CAPITAL LETTER K
'\uff2bey': 'value3',
}
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
msg='dict keys should be case sensitive unless `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
casesense=False), 'value0',
msg='allow non matching key case if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
casesense=False), 'value3',
msg='allow non matching Unicode key case if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
casesense=False), ['value1'],
msg='allow non matching key case in branch if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
casesense=False), ['value2'],
msg='allow non matching key case in branch path if `casesense`')
# Test traverse_string behavior
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
msg='do not traverse into string if not `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
_traverse_string=True), 's',
msg='traverse into string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
_traverse_string=True), '.',
msg='traverse into converted data if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
_traverse_string=True), 'str',
msg='`...` should result in string (same value) if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
_traverse_string=True), 'sr',
msg='`slice` should result in string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
_traverse_string=True), 'str',
msg='function should result in string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
_traverse_string=True), ['s', 'r'],
msg='branching should result in list if `traverse_string`')
self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
# Test re.Match as input obj
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
msg='`...` on a `re.Match` should give its `groups()`')
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
msg='function on a `re.Match` should give groupno, value starting at 0')
self.assertEqual(traverse_obj(mobj, 'group'), '3',
msg='str key on a `re.Match` should give group with that name')
self.assertEqual(traverse_obj(mobj, 2), '3',
msg='int key on a `re.Match` should give group with that name')
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
msg='str key on a `re.Match` should respect casesense')
self.assertEqual(traverse_obj(mobj, 'fail'), None,
msg='failing str key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
msg='failing str key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, 8), None,
msg='failing int key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
msg='function on a `re.Match` should give group name as well')
# Test xml.etree.ElementTree.Element as input obj
etree = compat_etree_fromstring('''<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>''')
self.assertEqual(traverse_obj(etree, ''), etree,
msg='empty str key should return the element itself')
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
msg='str key should return all children with that tag name')
self.assertEqual(traverse_obj(etree, Ellipsis), list(etree),
msg='`...` as key should return all children')
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
msg='function as key should get element as value')
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
msg='function as key should get index as key')
self.assertEqual(traverse_obj(etree, 0), etree[0],
msg='int key should return the nth child')
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
msg='`@<attribute>` at end of path should give that attribute')
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
msg='`@<nonexistent>` at end of path should give `None`')
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
msg='`@` should give the full attribute dict')
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
msg='`text()` at end of path should give the inner text')
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
msg='full python xpath features should be supported')
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
msg='special transformations should act on current element')
self.assertEqual(traverse_obj(etree, ('country', 0, Ellipsis, 'text()', T(int_or_none))), [1, 2008, 141100],
msg='special transformations should act on current element')
def test_traversal_unbranching(self):
self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), all]), [100, 1.2],
msg='`all` should give all results as list')
self.assertEqual(traverse_obj(_TEST_DATA, [(100, 1.2), any]), 100,
msg='`any` should give the first result')
self.assertEqual(traverse_obj(_TEST_DATA, [100, all]), [100],
msg='`all` should give list if non branching')
self.assertEqual(traverse_obj(_TEST_DATA, [100, any]), 100,
msg='`any` should give single item if non branching')
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]), [100],
msg='`all` should filter `None` and empty dict')
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]), 100,
msg='`any` should filter `None` and empty dict')
self.assertEqual(traverse_obj(_TEST_DATA, [{
'all': [('dict', 'None', 100, 1.2), all],
'any': [('dict', 'None', 100, 1.2), any],
}]), {'all': [100, 1.2], 'any': 100},
msg='`all`/`any` should apply to each dict path separately')
self.assertEqual(traverse_obj(_TEST_DATA, [{
'all': [('dict', 'None', 100, 1.2), all],
'any': [('dict', 'None', 100, 1.2), any],
}], get_all=False), {'all': [100, 1.2], 'any': 100},
msg='`all`/`any` should apply to dict regardless of `get_all`')
self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, T(float)]), None,
msg='`all` should reset branching status')
self.assertIs(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, T(float)]), None,
msg='`any` should reset branching status')
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, Ellipsis, T(float)]), [1.2],
msg='`all` should allow further branching')
self.assertEqual(traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, Ellipsis, 'index']), [0, 1],
msg='`any` should allow further branching')
def test_traversal_morsel(self):
values = {
'expires': 'a',
'path': 'b',
'comment': 'c',
'domain': 'd',
'max-age': 'e',
'secure': 'f',
'httponly': 'g',
'version': 'h',
'samesite': 'i',
}
# SameSite added in Py3.8, breaks .update for 3.5-3.7
if sys.version_info < (3, 8):
del values['samesite']
morsel = compat_http_cookies.Morsel()
morsel.set(str('item_key'), 'item_value', 'coded_value')
morsel.update(values)
values['key'] = str('item_key')
values['value'] = 'item_value'
values = dict((str(k), v) for k, v in values.items())
# make test pass even without ordered dict
value_set = set(values.values())
for key, value in values.items():
self.assertEqual(traverse_obj(morsel, key), value,
msg='Morsel should provide access to all values')
self.assertEqual(set(traverse_obj(morsel, Ellipsis)), value_set,
msg='`...` should yield all values')
self.assertEqual(set(traverse_obj(morsel, lambda k, v: True)), value_set,
msg='function key should yield all values')
self.assertIs(traverse_obj(morsel, [(None,), any]), morsel,
msg='Morsel should not be implicitly changed to dict on usage')
def test_get_first(self):
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
def test_dict_get(self):
FALSE_VALUES = {
'none': None,
'false': False,
'zero': 0,
'empty_string': '',
'empty_list': [],
}
d = FALSE_VALUES.copy()
d['a'] = 42
self.assertEqual(dict_get(d, 'a'), 42)
self.assertEqual(dict_get(d, 'b'), None)
self.assertEqual(dict_get(d, 'b', 42), 42)
self.assertEqual(dict_get(d, ('a', )), 42)
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
self.assertEqual(dict_get(d, ('b', 'c', )), None)
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
for key, false_value in FALSE_VALUES.items():
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
if __name__ == '__main__':
unittest.main()

View File

@ -14,10 +14,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import io import io
import itertools import itertools
import json import json
import re import types
import xml.etree.ElementTree import xml.etree.ElementTree
from youtube_dl.utils import ( from youtube_dl.utils import (
_UnsafeExtensionError,
age_restricted, age_restricted,
args_to_str, args_to_str,
base_url, base_url,
@ -28,7 +29,6 @@ from youtube_dl.utils import (
DateRange, DateRange,
detect_exe_version, detect_exe_version,
determine_ext, determine_ext,
dict_get,
encode_base_n, encode_base_n,
encode_compat_str, encode_compat_str,
encodeFilename, encodeFilename,
@ -44,7 +44,6 @@ from youtube_dl.utils import (
get_element_by_attribute, get_element_by_attribute,
get_elements_by_class, get_elements_by_class,
get_elements_by_attribute, get_elements_by_attribute,
get_first,
InAdvancePagedList, InAdvancePagedList,
int_or_none, int_or_none,
intlist_to_bytes, intlist_to_bytes,
@ -81,16 +80,14 @@ from youtube_dl.utils import (
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitize_url, sanitize_url,
sanitized_Request,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_or_none,
str_to_int, str_to_int,
strip_jsonp, strip_jsonp,
strip_or_none, strip_or_none,
subtitles_filename, subtitles_filename,
T,
timeconvert, timeconvert,
traverse_obj,
try_call, try_call,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
@ -131,10 +128,6 @@ from youtube_dl.compat import (
class TestUtil(unittest.TestCase): class TestUtil(unittest.TestCase):
# yt-dlp shim
def assertCountEqual(self, expected, got, msg='count should be the same'):
return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
def test_timeconvert(self): def test_timeconvert(self):
self.assertTrue(timeconvert('') is None) self.assertTrue(timeconvert('') is None)
self.assertTrue(timeconvert('bougrg') is None) self.assertTrue(timeconvert('bougrg') is None)
@ -255,6 +248,18 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('foo bar'), 'foo bar') self.assertEqual(sanitize_url('foo bar'), 'foo bar')
def test_sanitized_Request(self):
self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
'Basic Og==')
self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
'Basic OnBhc3M=')
self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
'Basic dXNlcjo=')
self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
'Basic dXNlcjpwYXNz')
def test_expand_path(self): def test_expand_path(self):
def env(var): def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
@ -267,6 +272,27 @@ class TestUtil(unittest.TestCase):
expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')), expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME')) '%s/expanded' % compat_getenv('HOME'))
_uncommon_extensions = [
('exe', 'abc.exe.ext'),
('de', 'abc.de.ext'),
('../.mp4', None),
('..\\.mp4', None),
]
def assertUnsafeExtension(self, ext=None):
assert_raises = self.assertRaises(_UnsafeExtensionError)
assert_raises.ext = ext
orig_exit = assert_raises.__exit__
def my_exit(self_, exc_type, exc_val, exc_tb):
did_raise = orig_exit(exc_type, exc_val, exc_tb)
if did_raise and assert_raises.ext is not None:
self.assertEqual(assert_raises.ext, assert_raises.exception.extension, 'Unsafe extension not as unexpected')
return did_raise
assert_raises.__exit__ = types.MethodType(my_exit, assert_raises)
return assert_raises
def test_prepend_extension(self): def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@ -275,6 +301,19 @@ class TestUtil(unittest.TestCase):
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
# Test uncommon extensions
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
for ext, result in self._uncommon_extensions:
with self.assertUnsafeExtension(ext):
prepend_extension('abc', ext)
if result:
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
else:
with self.assertUnsafeExtension(ext):
prepend_extension('abc.ext', ext, 'ext')
with self.assertUnsafeExtension(ext):
prepend_extension('abc.unexpected_ext', ext, 'ext')
def test_replace_extension(self): def test_replace_extension(self):
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@ -283,6 +322,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
# Test uncommon extensions
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
for ext, _ in self._uncommon_extensions:
with self.assertUnsafeExtension(ext):
replace_extension('abc', ext)
with self.assertUnsafeExtension(ext):
replace_extension('abc.ext', ext, 'ext')
with self.assertUnsafeExtension(ext):
replace_extension('abc.unexpected_ext', ext, 'ext')
def test_subtitles_filename(self): def test_subtitles_filename(self):
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
@ -512,11 +561,14 @@ class TestUtil(unittest.TestCase):
self.assertEqual(float_or_none(set()), None) self.assertEqual(float_or_none(set()), None)
def test_int_or_none(self): def test_int_or_none(self):
self.assertEqual(int_or_none(42), 42)
self.assertEqual(int_or_none('42'), 42) self.assertEqual(int_or_none('42'), 42)
self.assertEqual(int_or_none(''), None) self.assertEqual(int_or_none(''), None)
self.assertEqual(int_or_none(None), None) self.assertEqual(int_or_none(None), None)
self.assertEqual(int_or_none([]), None) self.assertEqual(int_or_none([]), None)
self.assertEqual(int_or_none(set()), None) self.assertEqual(int_or_none(set()), None)
self.assertEqual(int_or_none('42', base=8), 34)
self.assertRaises(TypeError, int_or_none(42, base=8))
def test_str_to_int(self): def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123,456'), 123456)
@ -724,28 +776,6 @@ class TestUtil(unittest.TestCase):
self.assertRaises( self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value') ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
def test_dict_get(self):
FALSE_VALUES = {
'none': None,
'false': False,
'zero': 0,
'empty_string': '',
'empty_list': [],
}
d = FALSE_VALUES.copy()
d['a'] = 42
self.assertEqual(dict_get(d, 'a'), 42)
self.assertEqual(dict_get(d, 'b'), None)
self.assertEqual(dict_get(d, 'b', 42), 42)
self.assertEqual(dict_get(d, ('a', )), 42)
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
self.assertEqual(dict_get(d, ('b', 'c', )), None)
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
for key, false_value in FALSE_VALUES.items():
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
def test_merge_dicts(self): def test_merge_dicts(self):
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
@ -1687,336 +1717,6 @@ Line 1
self.assertEqual(variadic('spam', allowed_types=dict), 'spam') self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
def test_traverse_obj(self):
str = compat_str
_TEST_DATA = {
100: 100,
1.2: 1.2,
'str': 'str',
'None': None,
'...': Ellipsis,
'urls': [
{'index': 0, 'url': 'https://www.example.com/0'},
{'index': 1, 'url': 'https://www.example.com/1'},
],
'data': (
{'index': 2},
{'index': 3},
),
'dict': {},
}
# define a pukka Iterable
def iter_range(stop):
for from_ in range(stop):
yield from_
# Test base functionality
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
msg='allow tuple path')
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
msg='allow list path')
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
msg='allow iterable path')
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
msg='single items should be treated as a path')
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
# Test Ellipsis behavior
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
(item for item in _TEST_DATA.values() if item not in (None, {})),
msg='`...` should give all non-discarded values')
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
msg='`...` selection for dicts should select all values')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='nested `...` queries should work')
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
msg='`...` query result should be flattened')
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
msg='`...` should accept iterables')
# Test function as key
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
[_TEST_DATA['urls']],
msg='function as query key should perform a filter based on (key, value)')
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
msg='exceptions in the query function should be caught')
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
msg='function key should accept iterables')
if __debug__:
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
traverse_obj(_TEST_DATA, lambda a: Ellipsis)
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
traverse_obj(_TEST_DATA, lambda a, b, c: Ellipsis)
# Test set as key (transformation/type, like `expected_type`)
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper), )), ['STR'],
msg='Function in set should be a transformation')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str))), ['str'],
msg='Type in set should be a type filter')
self.assertEqual(traverse_obj(_TEST_DATA, T(dict)), _TEST_DATA,
msg='A single set should be wrapped into a path')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str.upper))), ['STR'],
msg='Transformation function should not raise')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, T(str_or_none))),
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
msg='Function in set should be a transformation')
if __debug__:
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
traverse_obj(_TEST_DATA, set())
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
traverse_obj(_TEST_DATA, set((str.upper, str)))
# Test `slice` as a key
_SLICE_DATA = [0, 1, 2, 3, 4]
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
msg='slice on a dictionary should not throw')
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
msg='slice key should apply slice to sequence')
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
msg='slice key should apply slice to sequence')
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
msg='slice key should apply slice to sequence')
# Test alternative paths
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
msg='multiple `paths` should be treated as alternative paths')
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
msg='alternatives should exit early')
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
msg='alternatives should return `default` if exhausted')
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
msg='alternatives should track their own branching return')
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
msg='alternatives on empty objects should search further')
# Test branch and path nesting
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
msg='tuple as key should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
msg='list as key should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
msg='double nesting in path should be treated as paths')
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
msg='do not fail early on branching')
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='triple nesting in path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
['https://www.example.com/0', 'https://www.example.com/1'],
msg='ellipsis as branch path start gets flattened')
# Test dictionary as key
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
msg='dict key should result in a dict with the same keys')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
{0: 'https://www.example.com/0'},
msg='dict key should allow paths')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
{0: ['https://www.example.com/0']},
msg='tuple in dict path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
{0: ['https://www.example.com/0']},
msg='double nesting in dict path should be treated as paths')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
msg='triple nesting in dict path should be treated as branches')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
msg='remove `None` values when top level dict key fails')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
msg='use `default` if key fails and `default`')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
msg='remove empty values when dict key')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
msg='use `default` when dict key and a default')
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
msg='remove empty values when nested dict key fails')
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
msg='default to dict if pruned')
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
msg='default to dict if pruned and default is given')
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=Ellipsis), {0: {0: Ellipsis}},
msg='use nested `default` when nested dict key fails and `default`')
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {},
msg='remove key if branch in dict key not successful')
# Testing default parameter behavior
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
msg='default value should be `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
msg='chained fails should result in default')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
msg='should not short cirquit on `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
msg='invalid dict key should result in `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
msg='`None` is a deliberate sentinel and should become `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
msg='`IndexError` should result in `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
msg='if branched but not successful return `default` if defined, not `[]`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
msg='if branched but not successful return `default` even if `default` is `None`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
msg='if branched but not successful return `[]`, not `default`')
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
msg='if branched but object is empty return `[]`, not `default`')
self.assertEqual(traverse_obj(None, Ellipsis), [],
msg='if branched but object is `None` return `[]`, not `default`')
self.assertEqual(traverse_obj({0: None}, (0, Ellipsis)), [],
msg='if branched but state is `None` return `[]`, not `default`')
branching_paths = [
('fail', Ellipsis),
(Ellipsis, 'fail'),
100 * ('fail',) + (Ellipsis,),
(Ellipsis,) + 100 * ('fail',),
]
for branching_path in branching_paths:
self.assertEqual(traverse_obj({}, branching_path), [],
msg='if branched but state is `None`, return `[]` (not `default`)')
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
msg='if branching in last alternative and previous did match, return single value')
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
msg='if branching in first alternative and non-branching path does match, return single value')
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
msg='if branching in first alternative and non-branching path does not match, return `default`')
# Testing expected_type behavior
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
'str', msg='accept matching `expected_type` type')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
None, msg='reject non-matching `expected_type` type')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
'0', msg='transform type using type function')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
None, msg='wrap expected_type function in try_call')
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=str),
['str'], msg='eliminate items that expected_type fails on')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
{0: 100}, msg='type as expected_type should filter dict values')
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
1, msg='expected_type should not filter non-final dict values')
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
{0: {0: 100}}, msg='expected_type should transform deep dict values')
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
[{0: Ellipsis}, {0: Ellipsis}], msg='expected_type should transform branched dict values')
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
[4], msg='expected_type regression for type matching in tuple branching')
self.assertEqual(traverse_obj(_TEST_DATA, ['data', Ellipsis], expected_type=int),
[], msg='expected_type regression for type matching in dict result')
# Test get_all behavior
_GET_ALL_DATA = {'key': [0, 1, 2]}
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
msg='if not `get_all`, return only first matching value')
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
msg='do not overflatten if not `get_all`')
# Test casesense behavior
_CASESENSE_DATA = {
'KeY': 'value0',
0: {
'KeY': 'value1',
0: {'KeY': 'value2'},
},
# FULLWIDTH LATIN CAPITAL LETTER K
'\uff2bey': 'value3',
}
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
msg='dict keys should be case sensitive unless `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
casesense=False), 'value0',
msg='allow non matching key case if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
casesense=False), 'value3',
msg='allow non matching Unicode key case if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
casesense=False), ['value1'],
msg='allow non matching key case in branch if `casesense`')
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
casesense=False), ['value2'],
msg='allow non matching key case in branch path if `casesense`')
# Test traverse_string behavior
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
msg='do not traverse into string if not `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
_traverse_string=True), 's',
msg='traverse into string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
_traverse_string=True), '.',
msg='traverse into converted data if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
_traverse_string=True), 'str',
msg='`...` should result in string (same value) if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
_traverse_string=True), 'sr',
msg='`slice` should result in string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
_traverse_string=True), 'str',
msg='function should result in string if `traverse_string`')
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
_traverse_string=True), ['s', 'r'],
msg='branching should result in list if `traverse_string`')
self.assertEqual(traverse_obj({}, (0, Ellipsis), _traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), _traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
self.assertEqual(traverse_obj({}, (0, slice(1)), _traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
# Test is_user_input behavior
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
_is_user_input=True), 3,
msg='allow for string indexing if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
_is_user_input=True), tuple(range(8))[3:],
msg='allow for string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
_is_user_input=True), tuple(range(8))[:4:2],
msg='allow step in string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
_is_user_input=True), range(8),
msg='`:` should be treated as `...` if `is_user_input`')
with self.assertRaises(TypeError, msg='too many params should result in error'):
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
# Test re.Match as input obj
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
msg='`...` on a `re.Match` should give its `groups()`')
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
msg='function on a `re.Match` should give groupno, value starting at 0')
self.assertEqual(traverse_obj(mobj, 'group'), '3',
msg='str key on a `re.Match` should give group with that name')
self.assertEqual(traverse_obj(mobj, 2), '3',
msg='int key on a `re.Match` should give group with that name')
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
msg='str key on a `re.Match` should respect casesense')
self.assertEqual(traverse_obj(mobj, 'fail'), None,
msg='failing str key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
msg='failing str key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, 8), None,
msg='failing int key on a `re.Match` should return `default`')
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
msg='function on a `re.Match` should give group name as well')
def test_get_first(self):
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
def test_join_nonempty(self): def test_join_nonempty(self):
self.assertEqual(join_nonempty('a', 'b'), 'a-b') self.assertEqual(join_nonempty('a', 'b'), 'a-b')
self.assertEqual(join_nonempty( self.assertEqual(join_nonempty(

View File

@ -158,6 +158,30 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
), ),
(
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
(
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
),
(
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
),
(
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
] ]

35
test/testdata/mpd/range_only.mpd vendored Normal file
View File

@ -0,0 +1,35 @@
<?xml version="1.0"?>
<!-- MPD file Generated with GPAC version 1.0.1-revrelease at 2021-11-27T20:53:11.690Z -->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTime="PT1.500S" type="static" mediaPresentationDuration="PT0H0M30.196S" maxSegmentDuration="PT0H0M10.027S" profiles="urn:mpeg:dash:profile:full:2011">
<ProgramInformation moreInformationURL="http://gpac.io">
<Title>manifest.mpd generated by GPAC</Title>
</ProgramInformation>
<Period duration="PT0H0M30.196S">
<AdaptationSet segmentAlignment="true" maxWidth="768" maxHeight="432" maxFrameRate="30000/1001" par="16:9" lang="und" startWithSAP="1">
<Representation id="1" mimeType="video/mp4" codecs="avc1.4D401E" width="768" height="432" frameRate="30000/1001" sar="1:1" bandwidth="526987">
<BaseURL>video_dashinit.mp4</BaseURL>
<SegmentList timescale="90000" duration="900000">
<Initialization range="0-881"/>
<SegmentURL mediaRange="882-876094" indexRange="882-925"/>
<SegmentURL mediaRange="876095-1466732" indexRange="876095-876138"/>
<SegmentURL mediaRange="1466733-1953615" indexRange="1466733-1466776"/>
<SegmentURL mediaRange="1953616-1994211" indexRange="1953616-1953659"/>
</SegmentList>
</Representation>
</AdaptationSet>
<AdaptationSet segmentAlignment="true" lang="und" startWithSAP="1">
<Representation id="2" mimeType="audio/mp4" codecs="mp4a.40.2" audioSamplingRate="48000" bandwidth="98096">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>audio_dashinit.mp4</BaseURL>
<SegmentList timescale="48000" duration="480000">
<Initialization range="0-752"/>
<SegmentURL mediaRange="753-124129" indexRange="753-796"/>
<SegmentURL mediaRange="124130-250544" indexRange="124130-124173"/>
<SegmentURL mediaRange="250545-374929" indexRange="250545-250588"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

351
test/testdata/mpd/subtitles.mpd vendored Normal file
View File

@ -0,0 +1,351 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
<MPD
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="urn:mpeg:dash:schema:mpd:2011"
xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
type="static"
mediaPresentationDuration="PT14M48S"
maxSegmentDuration="PT1M"
minBufferTime="PT10S"
profiles="urn:mpeg:dash:profile:isoff-live:2011">
<Period
id="1"
duration="PT14M48S">
<BaseURL>dash/</BaseURL>
<AdaptationSet
id="1"
group="1"
contentType="audio"
segmentAlignment="true"
audioSamplingRate="48000"
mimeType="audio/mp4"
codecs="mp4a.40.2"
startWithSAP="1">
<AudioChannelConfiguration
schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
value="2" />
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate
timescale="48000"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="3584" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="audio=128001"
bandwidth="128001">
</Representation>
</AdaptationSet>
<AdaptationSet
id="2"
group="3"
contentType="text"
lang="en"
mimeType="application/mp4"
codecs="stpp"
startWithSAP="1">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
<SegmentTemplate
timescale="1000"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="60000" r="9" />
<S d="24000" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="textstream_eng=1000"
bandwidth="1000">
</Representation>
</AdaptationSet>
<AdaptationSet
id="3"
group="2"
contentType="video"
par="960:409"
minBandwidth="100000"
maxBandwidth="4482000"
maxWidth="1689"
maxHeight="720"
segmentAlignment="true"
mimeType="video/mp4"
codecs="avc1.4D401F"
startWithSAP="1">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate
timescale="12288"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="24576" r="443" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="video=100000"
bandwidth="100000"
width="336"
height="144"
sar="2880:2863"
scanType="progressive">
</Representation>
<Representation
id="video=326000"
bandwidth="326000"
width="562"
height="240"
sar="115200:114929"
scanType="progressive">
</Representation>
<Representation
id="video=698000"
bandwidth="698000"
width="844"
height="360"
sar="86400:86299"
scanType="progressive">
</Representation>
<Representation
id="video=1493000"
bandwidth="1493000"
width="1126"
height="480"
sar="230400:230267"
scanType="progressive">
</Representation>
<Representation
id="video=4482000"
bandwidth="4482000"
width="1688"
height="720"
sar="86400:86299"
scanType="progressive">
</Representation>
</AdaptationSet>
</Period>
</MPD>

32
test/testdata/mpd/url_and_range.mpd vendored Normal file
View File

@ -0,0 +1,32 @@
<?xml version="1.0" ?>
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT10.01S" mediaPresentationDuration="PT30.097S" type="static">
<!-- Created with Bento4 mp4-dash.py, VERSION=2.0.0-639 -->
<Period>
<!-- Video -->
<AdaptationSet mimeType="video/mp4" segmentAlignment="true" startWithSAP="1" maxWidth="768" maxHeight="432">
<Representation id="video-avc1" codecs="avc1.4D401E" width="768" height="432" scanType="progressive" frameRate="30000/1001" bandwidth="699597">
<SegmentList timescale="1000" duration="10010">
<Initialization sourceURL="video-frag.mp4" range="36-746"/>
<SegmentURL media="video-frag.mp4" mediaRange="747-876117"/>
<SegmentURL media="video-frag.mp4" mediaRange="876118-1466913"/>
<SegmentURL media="video-frag.mp4" mediaRange="1466914-1953954"/>
<SegmentURL media="video-frag.mp4" mediaRange="1953955-1994652"/>
</SegmentList>
</Representation>
</AdaptationSet>
<!-- Audio -->
<AdaptationSet mimeType="audio/mp4" startWithSAP="1" segmentAlignment="true">
<Representation id="audio-und-mp4a.40.2" codecs="mp4a.40.2" bandwidth="98808" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:mpegB:cicp:ChannelConfiguration" value="2"/>
<SegmentList timescale="1000" duration="10010">
<Initialization sourceURL="audio-frag.mp4" range="32-623"/>
<SegmentURL media="audio-frag.mp4" mediaRange="624-124199"/>
<SegmentURL media="audio-frag.mp4" mediaRange="124200-250303"/>
<SegmentURL media="audio-frag.mp4" mediaRange="250304-374365"/>
<SegmentURL media="audio-frag.mp4" mediaRange="374366-374836"/>
</SegmentList>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -7,6 +7,7 @@ import collections
import copy import copy
import datetime import datetime
import errno import errno
import functools
import io import io
import itertools import itertools
import json import json
@ -53,6 +54,7 @@ from .compat import (
compat_urllib_request_DataHandler, compat_urllib_request_DataHandler,
) )
from .utils import ( from .utils import (
_UnsafeExtensionError,
age_restricted, age_restricted,
args_to_str, args_to_str,
bug_reports_message, bug_reports_message,
@ -129,6 +131,20 @@ if compat_os_name == 'nt':
import ctypes import ctypes
def _catch_unsafe_file_extension(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except _UnsafeExtensionError as error:
self.report_error(
'{0} found; to avoid damaging your system, this value is disallowed.'
' If you believe this is an error{1}'.format(
error_to_compat_str(error), bug_reports_message(',')))
return wrapper
class YoutubeDL(object): class YoutubeDL(object):
"""YoutubeDL class. """YoutubeDL class.
@ -1039,8 +1055,8 @@ class YoutubeDL(object):
elif result_type in ('playlist', 'multi_video'): elif result_type in ('playlist', 'multi_video'):
# Protect from infinite recursion due to recursively nested playlists # Protect from infinite recursion due to recursively nested playlists
# (see https://github.com/ytdl-org/youtube-dl/issues/27833) # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
webpage_url = ie_result['webpage_url'] webpage_url = ie_result.get('webpage_url') # not all pl/mv have this
if webpage_url in self._playlist_urls: if webpage_url and webpage_url in self._playlist_urls:
self.to_screen( self.to_screen(
'[download] Skipping already downloaded playlist: %s' '[download] Skipping already downloaded playlist: %s'
% ie_result.get('title') or ie_result.get('id')) % ie_result.get('title') or ie_result.get('id'))
@ -1048,6 +1064,10 @@ class YoutubeDL(object):
self._playlist_level += 1 self._playlist_level += 1
self._playlist_urls.add(webpage_url) self._playlist_urls.add(webpage_url)
new_result = dict((k, v) for k, v in extra_info.items() if k not in ie_result)
if new_result:
new_result.update(ie_result)
ie_result = new_result
try: try:
return self.__process_playlist(ie_result, download) return self.__process_playlist(ie_result, download)
finally: finally:
@ -1593,6 +1613,28 @@ class YoutubeDL(object):
self.cookiejar.add_cookie_header(pr) self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie') return pr.get_header('Cookie')
def _fill_common_fields(self, info_dict, final=True):
for ts_key, date_key in (
('timestamp', 'upload_date'),
('release_timestamp', 'release_date'),
):
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
# see http://bugs.python.org/issue1646728)
try:
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
except (ValueError, OverflowError, OSError):
pass
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
if final:
for field in ('chapter', 'season', 'episode'):
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
def process_video_result(self, info_dict, download=True): def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'
@ -1660,24 +1702,7 @@ class YoutubeDL(object):
if 'display_id' not in info_dict and 'id' in info_dict: if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id'] info_dict['display_id'] = info_dict['id']
for ts_key, date_key in ( self._fill_common_fields(info_dict)
('timestamp', 'upload_date'),
('release_timestamp', 'release_date'),
):
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
# see http://bugs.python.org/issue1646728)
try:
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
except (ValueError, OverflowError, OSError):
pass
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
for field in ('chapter', 'season', 'episode'):
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for cc_kind in ('subtitles', 'automatic_captions'): for cc_kind in ('subtitles', 'automatic_captions'):
cc = info_dict.get(cc_kind) cc = info_dict.get(cc_kind)
@ -1916,6 +1941,7 @@ class YoutubeDL(object):
if self.params.get('forcejson', False): if self.params.get('forcejson', False):
self.to_stdout(json.dumps(self.sanitize_info(info_dict))) self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
@_catch_unsafe_file_extension
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single resolved IE result.""" """Process a single resolved IE result."""
@ -2088,18 +2114,26 @@ class YoutubeDL(object):
# TODO: Check acodec/vcodec # TODO: Check acodec/vcodec
return False return False
filename_real_ext = os.path.splitext(filename)[1][1:] exts = [info_dict['ext']]
filename_wo_ext = (
os.path.splitext(filename)[0]
if filename_real_ext == info_dict['ext']
else filename)
requested_formats = info_dict['requested_formats'] requested_formats = info_dict['requested_formats']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv' info_dict['ext'] = 'mkv'
self.report_warning( self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.') 'Requested formats are incompatible for merge and will be merged into mkv.')
exts.append(info_dict['ext'])
# Ensure filename always has a correct extension for successful merge # Ensure filename always has a correct extension for successful merge
filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) def correct_ext(filename, ext=exts[1]):
if filename == '-':
return filename
f_name, f_real_ext = os.path.splitext(filename)
f_real_ext = f_real_ext[1:]
filename_wo_ext = f_name if f_real_ext in exts else filename
if ext is None:
ext = f_real_ext or None
return join_nonempty(filename_wo_ext, ext, delim='.')
filename = correct_ext(filename)
if os.path.exists(encodeFilename(filename)): if os.path.exists(encodeFilename(filename)):
self.to_screen( self.to_screen(
'[download] %s has already been downloaded and ' '[download] %s has already been downloaded and '
@ -2109,8 +2143,9 @@ class YoutubeDL(object):
new_info = dict(info_dict) new_info = dict(info_dict)
new_info.update(f) new_info.update(f)
fname = prepend_extension( fname = prepend_extension(
self.prepare_filename(new_info), correct_ext(
'f%s' % f['format_id'], new_info['ext']) self.prepare_filename(new_info), new_info['ext']),
'f%s' % (f['format_id'],), new_info['ext'])
if not ensure_dir_exists(fname): if not ensure_dir_exists(fname):
return return
downloaded.append(fname) downloaded.append(fname)
@ -2635,12 +2670,12 @@ class YoutubeDL(object):
self.to_screen(msg('[info] %s is already present', label.title())) self.to_screen(msg('[info] %s is already present', label.title()))
return 'exists' return 'exists'
else: else:
self.to_screen(msg('[info] Writing %s as JSON to: ' + infofn, label)) self.to_screen(msg('[info] Writing %s as JSON to: ', label) + infofn)
try: try:
write_json_file(self.filter_requested_info(info_dict), infofn) write_json_file(self.filter_requested_info(info_dict), infofn)
return True return True
except (OSError, IOError): except (OSError, IOError):
self.report_error(msg('Cannot write %s to JSON file ' + infofn, label)) self.report_error(msg('Cannot write %s to JSON file ', label) + infofn)
return return
def _write_thumbnails(self, info_dict, filename): def _write_thumbnails(self, info_dict, filename):

View File

@ -21,6 +21,7 @@ from .compat import (
workaround_optparse_bug9161, workaround_optparse_bug9161,
) )
from .utils import ( from .utils import (
_UnsafeExtensionError,
DateRange, DateRange,
decodeOption, decodeOption,
DEFAULT_OUTTMPL, DEFAULT_OUTTMPL,
@ -173,6 +174,9 @@ def _real_main(argv=None):
if opts.ap_mso and opts.ap_mso not in MSO_INFO: if opts.ap_mso and opts.ap_mso not in MSO_INFO:
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
if opts.no_check_extensions:
_UnsafeExtensionError.lenient = True
def parse_retries(retries): def parse_retries(retries):
if retries in ('inf', 'infinite'): if retries in ('inf', 'infinite'):
parsed_retries = float('inf') parsed_retries = float('inf')

View File

@ -58,19 +58,26 @@ except ImportError: # Python 2
# Also fix up lack of method arg in old Pythons # Also fix up lack of method arg in old Pythons
try: try:
_req = compat_urllib_request.Request type(compat_urllib_request.Request('http://127.0.0.1', method='GET'))
_req('http://127.0.0.1', method='GET')
except TypeError: except TypeError:
class _request(object): def _add_init_method_arg(cls):
def __new__(cls, url, *args, **kwargs):
method = kwargs.pop('method', None)
r = _req(url, *args, **kwargs)
if method:
r.get_method = types.MethodType(lambda _: method, r)
return r
compat_urllib_request.Request = _request init = cls.__init__
def wrapped_init(self, *args, **kwargs):
method = kwargs.pop('method', 'GET')
init(self, *args, **kwargs)
if any(callable(x.__dict__.get('get_method')) for x in (self.__class__, self) if x != cls):
# allow instance or its subclass to override get_method()
return
if self.has_data() and method == 'GET':
method = 'POST'
self.get_method = types.MethodType(lambda _: method, self)
cls.__init__ = wrapped_init
_add_init_method_arg(compat_urllib_request.Request)
del _add_init_method_arg
try: try:
import urllib.error as compat_urllib_error import urllib.error as compat_urllib_error
@ -2414,29 +2421,26 @@ except ImportError: # Python 2
compat_urllib_request_urlretrieve = compat_urlretrieve compat_urllib_request_urlretrieve = compat_urlretrieve
try: try:
from HTMLParser import (
HTMLParser as compat_HTMLParser,
HTMLParseError as compat_HTMLParseError)
except ImportError: # Python 3
from html.parser import HTMLParser as compat_HTMLParser from html.parser import HTMLParser as compat_HTMLParser
except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser
compat_html_parser_HTMLParser = compat_HTMLParser
try: # Python 2
from HTMLParser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python <3.4
try: try:
from html.parser import HTMLParseError as compat_HTMLParseError from html.parser import HTMLParseError as compat_HTMLParseError
except ImportError: # Python >3.4 except ImportError: # Python >3.4
# HTMLParseError was deprecated in Python 3.3 and removed in
# HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass
compat_html_parser_HTMLParser = compat_HTMLParser
compat_html_parser_HTMLParseError = compat_HTMLParseError compat_html_parser_HTMLParseError = compat_HTMLParseError
try: try:
from subprocess import DEVNULL _DEVNULL = subprocess.DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: _DEVNULL
except ImportError: except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
try: try:
@ -2715,8 +2719,222 @@ if sys.version_info < (2, 7):
if isinstance(xpath, compat_str): if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii') xpath = xpath.encode('ascii')
return xpath return xpath
# further code below based on CPython 2.7 source
import functools
_xpath_tokenizer_re = re.compile(r'''(?x)
( # (1)
'[^']*'|"[^"]*"| # quoted strings, or
::|//?|\.\.|\(\)|[/.*:[\]()@=] # navigation specials
)| # or (2)
((?:\{[^}]+\})?[^/[\]()@=\s]+)| # token: optional {ns}, no specials
\s+ # or white space
''')
def _xpath_tokenizer(pattern, namespaces=None):
for token in _xpath_tokenizer_re.findall(pattern):
tag = token[1]
if tag and tag[0] != "{" and ":" in tag:
try:
if not namespaces:
raise KeyError
prefix, uri = tag.split(":", 1)
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix)
else:
yield token
def _get_parent_map(context):
parent_map = context.parent_map
if parent_map is None:
context.parent_map = parent_map = {}
for p in context.root.getiterator():
for e in p:
parent_map[e] = p
return parent_map
def _select(context, result, filter_fn=lambda *_: True):
for elem in result:
for e in elem:
if filter_fn(e, elem):
yield e
def _prepare_child(next_, token):
tag = token[1]
return functools.partial(_select, filter_fn=lambda e, _: e.tag == tag)
def _prepare_star(next_, token):
return _select
def _prepare_self(next_, token):
return lambda _, result: (e for e in result)
def _prepare_descendant(next_, token):
token = next(next_)
if token[0] == "*":
tag = "*"
elif not token[0]:
tag = token[1]
else:
raise SyntaxError("invalid descendant")
def select(context, result):
for elem in result:
for e in elem.getiterator(tag):
if e is not elem:
yield e
return select
def _prepare_parent(next_, token):
def select(context, result):
# FIXME: raise error if .. is applied at toplevel?
parent_map = _get_parent_map(context)
result_map = {}
for elem in result:
if elem in parent_map:
parent = parent_map[elem]
if parent not in result_map:
result_map[parent] = None
yield parent
return select
def _prepare_predicate(next_, token):
signature = []
predicate = []
for token in next_:
if token[0] == "]":
break
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
predicate.append(token[1])
def select(context, result, filter_fn=lambda _: True):
for elem in result:
if filter_fn(elem):
yield elem
signature = "".join(signature)
# use signature to determine predicate type
if signature == "@-":
# [@attribute] predicate
key = predicate[1]
return functools.partial(
select, filter_fn=lambda el: el.get(key) is not None)
if signature == "@-='":
# [@attribute='value']
key = predicate[1]
value = predicate[-1]
return functools.partial(
select, filter_fn=lambda el: el.get(key) == value)
if signature == "-" and not re.match(r"\d+$", predicate[0]):
# [tag]
tag = predicate[0]
return functools.partial(
select, filter_fn=lambda el: el.find(tag) is not None)
if signature == "-='" and not re.match(r"\d+$", predicate[0]):
# [tag='value']
tag = predicate[0]
value = predicate[-1]
def itertext(el):
for e in el.getiterator():
e = e.text
if e:
yield e
def select(context, result):
for elem in result:
for e in elem.findall(tag):
if "".join(itertext(e)) == value:
yield elem
break
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]
if signature == "-":
index = int(predicate[0]) - 1
else:
if predicate[0] != "last":
raise SyntaxError("unsupported function")
if signature == "-()-":
try:
index = int(predicate[2]) - 1
except ValueError:
raise SyntaxError("unsupported expression")
else:
index = -1
def select(context, result):
parent_map = _get_parent_map(context)
for elem in result:
try:
parent = parent_map[elem]
# FIXME: what if the selector is "*" ?
elems = list(parent.findall(elem.tag))
if elems[index] is elem:
yield elem
except (IndexError, KeyError):
pass
return select
raise SyntaxError("invalid predicate")
ops = {
"": _prepare_child,
"*": _prepare_star,
".": _prepare_self,
"..": _prepare_parent,
"//": _prepare_descendant,
"[": _prepare_predicate,
}
_cache = {}
class _SelectorContext:
parent_map = None
def __init__(self, root):
self.root = root
##
# Generate all matching objects.
def compat_etree_iterfind(elem, path, namespaces=None):
# compile selector pattern
if path[-1:] == "/":
path = path + "*" # implicit all (FIXME: keep this?)
try:
selector = _cache[path]
except KeyError:
if len(_cache) > 100:
_cache.clear()
if path[:1] == "/":
raise SyntaxError("cannot use absolute path on element")
tokens = _xpath_tokenizer(path, namespaces)
selector = []
for token in tokens:
if token[0] == "/":
continue
try:
selector.append(ops[token[0]](tokens, token))
except StopIteration:
raise SyntaxError("invalid path")
_cache[path] = selector
# execute selector pattern
result = [elem]
context = _SelectorContext(elem)
for select in selector:
result = select(context, result)
return result
# end of code based on CPython 2.7 source
else: else:
compat_xpath = lambda xpath: xpath compat_xpath = lambda xpath: xpath
compat_etree_iterfind = lambda element, match: element.iterfind(match)
compat_os_name = os._name if os.name == 'java' else os.name compat_os_name = os._name if os.name == 'java' else os.name
@ -2752,7 +2970,7 @@ except (AssertionError, UnicodeEncodeError):
def compat_ord(c): def compat_ord(c):
if type(c) is int: if isinstance(c, int):
return c return c
else: else:
return ord(c) return ord(c)
@ -2936,6 +3154,51 @@ else:
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
try:
from contextlib import suppress as compat_contextlib_suppress
except ImportError:
class compat_contextlib_suppress(object):
_exceptions = None
def __init__(self, *exceptions):
super(compat_contextlib_suppress, self).__init__()
# TODO: [Base]ExceptionGroup (3.12+)
self._exceptions = exceptions
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
return exc_type is not None and issubclass(exc_type, self._exceptions or tuple())
# subprocess.Popen context manager
# avoids leaking handles if .communicate() is not called
try:
_Popen = subprocess.Popen
# check for required context manager attributes
_Popen.__enter__ and _Popen.__exit__
compat_subprocess_Popen = _Popen
except AttributeError:
# not a context manager - make one
from contextlib import contextmanager
@contextmanager
def compat_subprocess_Popen(*args, **kwargs):
popen = None
try:
popen = _Popen(*args, **kwargs)
yield popen
finally:
if popen:
for f in (popen.stdin, popen.stdout, popen.stderr):
if f:
# repeated .close() is OK, but just in case
with compat_contextlib_suppress(EnvironmentError):
f.close()
popen.wait()
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken # See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161(): def workaround_optparse_bug9161():
@ -3256,8 +3519,10 @@ __all__ = [
'compat_http_cookiejar_Cookie', 'compat_http_cookiejar_Cookie',
'compat_http_cookies', 'compat_http_cookies',
'compat_http_cookies_SimpleCookie', 'compat_http_cookies_SimpleCookie',
'compat_contextlib_suppress',
'compat_ctypes_WINFUNCTYPE', 'compat_ctypes_WINFUNCTYPE',
'compat_etree_fromstring', 'compat_etree_fromstring',
'compat_etree_iterfind',
'compat_filter', 'compat_filter',
'compat_get_terminal_size', 'compat_get_terminal_size',
'compat_getenv', 'compat_getenv',
@ -3291,6 +3556,7 @@ __all__ = [
'compat_struct_pack', 'compat_struct_pack',
'compat_struct_unpack', 'compat_struct_unpack',
'compat_subprocess_get_DEVNULL', 'compat_subprocess_get_DEVNULL',
'compat_subprocess_Popen',
'compat_tokenize_tokenize', 'compat_tokenize_tokenize',
'compat_urllib_error', 'compat_urllib_error',
'compat_urllib_parse', 'compat_urllib_parse',

View File

@ -35,6 +35,7 @@ class DashSegmentsFD(FragmentFD):
for frag_index, fragment in enumerate(fragments, 1): for frag_index, fragment in enumerate(fragments, 1):
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
success = False
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment # generate a valid MP4 file, so always abort for the first segment
fatal = frag_index == 1 or not skip_unavailable_fragments fatal = frag_index == 1 or not skip_unavailable_fragments
@ -42,10 +43,14 @@ class DashSegmentsFD(FragmentFD):
if not fragment_url: if not fragment_url:
assert fragment_base_url assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path']) fragment_url = urljoin(fragment_base_url, fragment['path'])
success = False headers = info_dict.get('http_headers')
fragment_range = fragment.get('range')
if fragment_range:
headers = headers.copy() if headers else {}
headers['Range'] = 'bytes=%s' % (fragment_range,)
for count in itertools.count(): for count in itertools.count():
try: try:
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) success, frag_content = self._download_fragment(ctx, fragment_url, info_dict, headers)
if not success: if not success:
return False return False
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)

View File

@ -11,8 +11,14 @@ from .common import FileDownloader
from ..compat import ( from ..compat import (
compat_setenv, compat_setenv,
compat_str, compat_str,
compat_subprocess_Popen,
) )
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
try:
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
except ImportError:
FFmpegPostProcessor = None
from ..utils import ( from ..utils import (
cli_option, cli_option,
cli_valueless_option, cli_valueless_option,
@ -206,7 +212,10 @@ class WgetFD(ExternalFD):
retry[1] = '0' retry[1] = '0'
cmd += retry cmd += retry
cmd += self._option('--bind-address', 'source_address') cmd += self._option('--bind-address', 'source_address')
cmd += self._option('--proxy', 'proxy') proxy = self.params.get('proxy')
if proxy:
for var in ('http_proxy', 'https_proxy'):
cmd += ['--execute', '%s=%s' % (var, proxy)]
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args() cmd += self._configuration_args()
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
@ -358,13 +367,14 @@ class FFmpegFD(ExternalFD):
@classmethod @classmethod
def available(cls): def available(cls):
return FFmpegPostProcessor().available # actual availability can only be confirmed for an instance
return bool(FFmpegPostProcessor)
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
url = info_dict['url'] # `downloader` means the parent `YoutubeDL`
ffpp = FFmpegPostProcessor(downloader=self) ffpp = FFmpegPostProcessor(downloader=self.ydl)
if not ffpp.available: if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') self.report_error('ffmpeg required for download but no ffmpeg (nor avconv) executable could be found. Please install one.')
return False return False
ffpp.check_version() ffpp.check_version()
@ -393,6 +403,7 @@ class FFmpegFD(ExternalFD):
# if end_time: # if end_time:
# args += ['-t', compat_str(end_time - start_time)] # args += ['-t', compat_str(end_time - start_time)]
url = info_dict['url']
cookies = self.ydl.cookiejar.get_cookies_for_url(url) cookies = self.ydl.cookiejar.get_cookies_for_url(url)
if cookies: if cookies:
args.extend(['-cookies', ''.join( args.extend(['-cookies', ''.join(
@ -480,7 +491,12 @@ class FFmpegFD(ExternalFD):
self._debug_cmd(args) self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) # From [1], a PIPE opened in Popen() should be closed, unless
# .communicate() is called. Avoid leaking any PIPEs by using Popen
# as a context manager (newer Python 3.x and compat)
# Fixes "Resource Warning" in test/test_downloader_external.py
# [1] https://devpress.csdn.net/python/62fde12d7e66823466192e48.html
with compat_subprocess_Popen(args, stdin=subprocess.PIPE, env=env) as proc:
try: try:
retval = proc.wait() retval = proc.wait()
except BaseException as e: except BaseException as e:
@ -493,7 +509,6 @@ class FFmpegFD(ExternalFD):
process_communicate_or_kill(proc, b'q') process_communicate_or_kill(proc, b'q')
else: else:
proc.kill() proc.kill()
proc.wait()
raise raise
return retval return retval

View File

@ -0,0 +1,79 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
merge_dicts,
parse_iso8601,
T,
traverse_obj,
txt_or_none,
urljoin,
)
class CaffeineTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/]+/video/(?P<id>[0-9a-f-]+)'
_TESTS = [{
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
'info_dict': {
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
'ext': 'mp4',
'title': 'GOOOOD MORNINNNNN #highlights',
'timestamp': 1654702180,
'upload_date': '20220608',
'uploader': 'TsuSurf',
'duration': 3145,
'age_limit': 17,
},
'params': {
'format': 'bestvideo',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json(
'https://api.caffeine.tv/social/public/activity/' + video_id,
video_id)
broadcast_info = traverse_obj(json_data, ('broadcast_info', T(dict))) or {}
title = broadcast_info['broadcast_title']
video_url = broadcast_info['video_url']
ext = determine_ext(video_url)
if ext == 'm3u8':
formats = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8',
fatal=False)
else:
formats = [{'url': video_url}]
self._sort_formats(formats)
return merge_dicts({
'id': video_id,
'title': title,
'formats': formats,
}, traverse_obj(json_data, {
'uploader': ((None, 'user'), 'username'),
}, get_all=False), traverse_obj(json_data, {
'like_count': ('like_count', T(int_or_none)),
'view_count': ('view_count', T(int_or_none)),
'comment_count': ('comment_count', T(int_or_none)),
'tags': ('tags', Ellipsis, T(txt_or_none)),
'is_live': 'is_live',
'uploader': ('user', 'name'),
}), traverse_obj(broadcast_info, {
'duration': ('content_duration', T(int_or_none)),
'timestamp': ('broadcast_start_time', T(parse_iso8601)),
'thumbnail': ('preview_image_path', T(lambda u: urljoin(url, u))),
'age_limit': ('content_rating', T(lambda r: r and {
# assume Apple Store ratings [1]
# 1. https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
'FOUR_PLUS': 0,
'NINE_PLUS': 9,
'TWELVE_PLUS': 12,
'SEVENTEEN_PLUS': 17,
}.get(r, 17))),
}))

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import base64 import base64
import collections
import datetime import datetime
import functools import functools
import hashlib import hashlib
@ -24,6 +25,7 @@ from ..compat import (
compat_getpass, compat_getpass,
compat_integer_types, compat_integer_types,
compat_http_client, compat_http_client,
compat_kwargs,
compat_map as map, compat_map as map,
compat_open as open, compat_open as open,
compat_os_name, compat_os_name,
@ -58,6 +60,7 @@ from ..utils import (
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
JSON_LD_RE, JSON_LD_RE,
mimetype2ext, mimetype2ext,
@ -74,6 +77,7 @@ from ..utils import (
str_or_none, str_or_none,
str_to_int, str_to_int,
strip_or_none, strip_or_none,
T,
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
@ -180,6 +184,8 @@ class InfoExtractor(object):
fragment_base_url fragment_base_url
* "duration" (optional, int or float) * "duration" (optional, int or float)
* "filesize" (optional, int) * "filesize" (optional, int)
* "range" (optional, str of the form "start-end"
to use in HTTP Range header)
* preference Order number of this format. If this field is * preference Order number of this format. If this field is
present and not None, the formats get sorted present and not None, the formats get sorted
by this field, regardless of all other values. by this field, regardless of all other values.
@ -596,6 +602,14 @@ class InfoExtractor(object):
"""Sets the downloader for this IE.""" """Sets the downloader for this IE."""
self._downloader = downloader self._downloader = downloader
@property
def cache(self):
return self._downloader.cache
@property
def cookiejar(self):
return self._downloader.cookiejar
def _real_initialize(self): def _real_initialize(self):
"""Real initialization process. Redefine in subclasses.""" """Real initialization process. Redefine in subclasses."""
pass pass
@ -942,14 +956,47 @@ class InfoExtractor(object):
else: else:
self.report_warning(errmsg + str(ve)) self.report_warning(errmsg + str(ve))
def report_warning(self, msg, video_id=None): def __ie_msg(self, *msg):
return '[{0}] {1}'.format(self.IE_NAME, ''.join(msg))
# msg, video_id=None, *args, only_once=False, **kwargs
def report_warning(self, msg, *args, **kwargs):
if len(args) > 0:
video_id = args[0]
args = args[1:]
else:
video_id = kwargs.pop('video_id', None)
idstr = '' if video_id is None else '%s: ' % video_id idstr = '' if video_id is None else '%s: ' % video_id
self._downloader.report_warning( self._downloader.report_warning(
'[%s] %s%s' % (self.IE_NAME, idstr, msg)) self.__ie_msg(idstr, msg), *args, **kwargs)
def to_screen(self, msg): def to_screen(self, msg):
"""Print msg to screen, prefixing it with '[ie_name]'""" """Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg)) self._downloader.to_screen(self.__ie_msg(msg))
def write_debug(self, msg, only_once=False, _cache=[]):
'''Log debug message or Print message to stderr'''
if not self.get_param('verbose', False):
return
message = '[debug] ' + self.__ie_msg(msg)
logger = self.get_param('logger')
if logger:
logger.debug(message)
else:
if only_once and hash(message) in _cache:
return
self._downloader.to_stderr(message)
_cache.append(hash(message))
# name, default=None, *args, **kwargs
def get_param(self, name, *args, **kwargs):
default, args = (args[0], args[1:]) if len(args) > 0 else (kwargs.pop('default', None), args)
if self._downloader:
return self._downloader.params.get(name, default, *args, **kwargs)
return default
def report_drm(self, video_id):
self.raise_no_formats('This video is DRM protected', expected=True, video_id=video_id)
def report_extraction(self, id_or_name): def report_extraction(self, id_or_name):
"""Report information extraction.""" """Report information extraction."""
@ -977,6 +1024,15 @@ class InfoExtractor(object):
def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None): def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
raise GeoRestrictedError(msg, countries=countries) raise GeoRestrictedError(msg, countries=countries)
def raise_no_formats(self, msg, expected=False, video_id=None):
if expected and (
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg, video_id)
elif isinstance(msg, ExtractorError):
raise msg
else:
raise ExtractorError(msg, expected=expected, video_id=video_id)
# Methods for following #608 # Methods for following #608
@staticmethod @staticmethod
def url_result(url, ie=None, video_id=None, video_title=None): def url_result(url, ie=None, video_id=None, video_title=None):
@ -1047,6 +1103,60 @@ class InfoExtractor(object):
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message()) self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None return None
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
"""Searches string for the JSON object specified by start_pattern"""
# self, start_pattern, string, name, video_id, *, end_pattern='',
# contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT
# NB: end_pattern is only used to reduce the size of the initial match
end_pattern = kwargs.pop('end_pattern', '')
# (?:[\s\S]) simulates (?(s):.) (eg)
contains_pattern = kwargs.pop('contains_pattern', r'{[\s\S]+}')
fatal = kwargs.pop('fatal', True)
default = kwargs.pop('default', NO_DEFAULT)
if default is NO_DEFAULT:
default, has_default = {}, False
else:
fatal, has_default = False, True
json_string = self._search_regex(
r'(?:{0})\s*(?P<json>{1})\s*(?:{2})'.format(
start_pattern, contains_pattern, end_pattern),
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
if not json_string:
return default
# yt-dlp has a special JSON parser that allows trailing text.
# Until that arrives here, the diagnostic from the exception
# raised by json.loads() is used to extract the wanted text.
# Either way, it's a problem if a transform_source() can't
# handle the trailing text.
# force an exception
kwargs['fatal'] = True
# self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
for _ in range(2):
try:
# return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
transform_source = kwargs.pop('transform_source', None)
if transform_source:
json_string = transform_source(json_string)
return self._parse_json(json_string, video_id, **compat_kwargs(kwargs))
except ExtractorError as e:
end = int_or_none(self._search_regex(r'\(char\s+(\d+)', error_to_compat_str(e), 'end', default=None))
if end is not None:
json_string = json_string[:end]
continue
msg = 'Unable to extract {0} - Failed to parse JSON'.format(name)
if fatal:
raise ExtractorError(msg, cause=e.cause, video_id=video_id)
elif not has_default:
self.report_warning(
'{0}: {1}'.format(msg, error_to_compat_str(e)), video_id=video_id)
return default
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None): def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
""" """
Like _search_regex, but strips HTML tags and unescapes entities. Like _search_regex, but strips HTML tags and unescapes entities.
@ -1059,10 +1169,10 @@ class InfoExtractor(object):
def _get_netrc_login_info(self, netrc_machine=None): def _get_netrc_login_info(self, netrc_machine=None):
username = None username = None
password = None password = None
netrc_machine = netrc_machine or self._NETRC_MACHINE
if self._downloader.params.get('usenetrc', False): if self._downloader.params.get('usenetrc', False):
try: try:
netrc_machine = netrc_machine or self._NETRC_MACHINE
info = netrc.netrc().authenticators(netrc_machine) info = netrc.netrc().authenticators(netrc_machine)
if info is not None: if info is not None:
username = info[0] username = info[0]
@ -1070,7 +1180,7 @@ class InfoExtractor(object):
else: else:
raise netrc.NetrcParseError( raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine) 'No authenticators for %s' % netrc_machine)
except (IOError, netrc.NetrcParseError) as err: except (AttributeError, IOError, netrc.NetrcParseError) as err:
self._downloader.report_warning( self._downloader.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err)) 'parsing .netrc: %s' % error_to_compat_str(err))
@ -1380,14 +1490,18 @@ class InfoExtractor(object):
return dict((k, v) for k, v in info.items() if v is not None) return dict((k, v) for k, v in info.items() if v is not None)
def _search_nextjs_data(self, webpage, video_id, **kw): def _search_nextjs_data(self, webpage, video_id, **kw):
nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal')) # ..., *, transform_source=None, fatal=True, default=NO_DEFAULT
kw.pop('transform_source', None)
next_data = self._search_regex( # TODO: remove this backward compat
r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''', default = kw.get('default', NO_DEFAULT)
webpage, 'next.js data', group='nd', **kw) if default == '{}':
if not next_data: kw['default'] = {}
return {} kw = compat_kwargs(kw)
return self._parse_json(next_data, video_id, **nkw)
return self._search_json(
r'''<script\s[^>]*?\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>''',
webpage, 'next.js data', video_id, end_pattern='</script>',
**kw)
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs): def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
@ -1701,6 +1815,12 @@ class InfoExtractor(object):
'format_note': 'Quality selection URL', 'format_note': 'Quality selection URL',
} }
def _report_ignoring_subs(self, name):
self.report_warning(bug_reports_message(
'Ignoring subtitle tracks found in the {0} manifest; '
'if any subtitle tracks are missing,'.format(name)
), only_once=True)
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None, entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None, m3u8_id=None, note=None, errnote=None,
@ -2141,23 +2261,46 @@ class InfoExtractor(object):
}) })
return entries return entries
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): def _extract_mpd_formats(self, *args, **kwargs):
fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs)
if subs:
self._report_ignoring_subs('DASH')
return fmts
def _extract_mpd_formats_and_subtitles(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers=None, query=None):
# TODO: or not? param not yet implemented
if self.get_param('ignore_no_formats_error'):
fatal = False
res = self._download_xml_handle( res = self._download_xml_handle(
mpd_url, video_id, mpd_url, video_id,
note=note or 'Downloading MPD manifest', note='Downloading MPD manifest' if note is None else note,
errnote=errnote or 'Failed to download MPD manifest', errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers or {}, query=query or {})
if res is False: if res is False:
return [] return [], {}
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None: if mpd_doc is None:
return [] return [], {}
mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats( # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.geturl()
mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles(
mpd_doc, mpd_id, mpd_base_url, mpd_url) mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): def _parse_mpd_formats(self, *args, **kwargs):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
if subs:
self._report_ignoring_subs('DASH')
return fmts
def _parse_mpd_formats_and_subtitles(
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
""" """
Parse formats from MPD manifest. Parse formats from MPD manifest.
References: References:
@ -2165,8 +2308,10 @@ class InfoExtractor(object):
http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
""" """
# TODO: param not yet implemented: default like previous yt-dl logic
if not self.get_param('dynamic_mpd', False):
if mpd_doc.get('type') == 'dynamic': if mpd_doc.get('type') == 'dynamic':
return [] return [], {}
namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None) namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
@ -2176,8 +2321,24 @@ class InfoExtractor(object):
def is_drm_protected(element): def is_drm_protected(element):
return element.find(_add_ns('ContentProtection')) is not None return element.find(_add_ns('ContentProtection')) is not None
from ..utils import YoutubeDLHandler
fix_path = YoutubeDLHandler._fix_path
def resolve_base_url(element, parent_base_url=None):
# TODO: use native XML traversal when ready
b_url = traverse_obj(element, (
T(lambda e: e.find(_add_ns('BaseURL')).text)))
if parent_base_url and b_url:
if not parent_base_url[-1] in ('/', ':'):
parent_base_url += '/'
b_url = compat_urlparse.urljoin(parent_base_url, b_url)
if b_url:
b_url = fix_path(b_url)
return b_url or parent_base_url
def extract_multisegment_info(element, ms_parent_info): def extract_multisegment_info(element, ms_parent_info):
ms_info = ms_parent_info.copy() ms_info = ms_parent_info.copy()
base_url = ms_info['base_url'] = resolve_base_url(element, ms_info.get('base_url'))
# As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
# common attributes and elements. We will only extract relevant # common attributes and elements. We will only extract relevant
@ -2211,15 +2372,27 @@ class InfoExtractor(object):
def extract_Initialization(source): def extract_Initialization(source):
initialization = source.find(_add_ns('Initialization')) initialization = source.find(_add_ns('Initialization'))
if initialization is not None: if initialization is not None:
ms_info['initialization_url'] = initialization.attrib['sourceURL'] ms_info['initialization_url'] = initialization.get('sourceURL') or base_url
initialization_url_range = initialization.get('range')
if initialization_url_range:
ms_info['initialization_url_range'] = initialization_url_range
segment_list = element.find(_add_ns('SegmentList')) segment_list = element.find(_add_ns('SegmentList'))
if segment_list is not None: if segment_list is not None:
extract_common(segment_list) extract_common(segment_list)
extract_Initialization(segment_list) extract_Initialization(segment_list)
segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
if segment_urls_e: segment_urls = traverse_obj(segment_urls_e, (
ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] Ellipsis, T(lambda e: e.attrib), 'media'))
if segment_urls:
ms_info['segment_urls'] = segment_urls
segment_urls_range = traverse_obj(segment_urls_e, (
Ellipsis, T(lambda e: e.attrib), 'mediaRange',
T(lambda r: re.findall(r'^\d+-\d+$', r)), 0))
if segment_urls_range:
ms_info['segment_urls_range'] = segment_urls_range
if not segment_urls:
ms_info['segment_urls'] = [base_url for _ in segment_urls_range]
else: else:
segment_template = element.find(_add_ns('SegmentTemplate')) segment_template = element.find(_add_ns('SegmentTemplate'))
if segment_template is not None: if segment_template is not None:
@ -2235,17 +2408,20 @@ class InfoExtractor(object):
return ms_info return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats = [] formats, subtitles = [], {}
stream_numbers = collections.defaultdict(int)
mpd_base_url = resolve_base_url(mpd_doc, mpd_base_url or mpd_url)
for period in mpd_doc.findall(_add_ns('Period')): for period in mpd_doc.findall(_add_ns('Period')):
period_duration = parse_duration(period.get('duration')) or mpd_duration period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, { period_ms_info = extract_multisegment_info(period, {
'start_number': 1, 'start_number': 1,
'timescale': 1, 'timescale': 1,
'base_url': mpd_base_url,
}) })
for adaptation_set in period.findall(_add_ns('AdaptationSet')): for adaptation_set in period.findall(_add_ns('AdaptationSet')):
if is_drm_protected(adaptation_set): if is_drm_protected(adaptation_set):
continue continue
adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
for representation in adaptation_set.findall(_add_ns('Representation')): for representation in adaptation_set.findall(_add_ns('Representation')):
if is_drm_protected(representation): if is_drm_protected(representation):
continue continue
@ -2253,27 +2429,35 @@ class InfoExtractor(object):
representation_attrib.update(representation.attrib) representation_attrib.update(representation.attrib)
# According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
mime_type = representation_attrib['mimeType'] mime_type = representation_attrib['mimeType']
content_type = mime_type.split('/')[0] content_type = representation_attrib.get('contentType') or mime_type.split('/')[0]
if content_type == 'text': codec_str = representation_attrib.get('codecs', '')
# TODO implement WebVTT downloading # Some kind of binary subtitle found in some youtube livestreams
pass if mime_type == 'application/x-rawcc':
elif content_type in ('video', 'audio'): codecs = {'scodec': codec_str}
base_url = '' else:
for element in (representation, adaptation_set, period, mpd_doc): codecs = parse_codecs(codec_str)
base_url_e = element.find(_add_ns('BaseURL')) if content_type not in ('video', 'audio', 'text'):
if base_url_e is not None: if mime_type == 'image/jpeg':
base_url = base_url_e.text + base_url content_type = mime_type
if re.match(r'^https?://', base_url): elif codecs.get('vcodec', 'none') != 'none':
break content_type = 'video'
if mpd_base_url and not re.match(r'^https?://', base_url): elif codecs.get('acodec', 'none') != 'none':
if not mpd_base_url.endswith('/') and not base_url.startswith('/'): content_type = 'audio'
mpd_base_url += '/' elif codecs.get('scodec', 'none') != 'none':
base_url = mpd_base_url + base_url content_type = 'text'
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
content_type = 'text'
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
continue
representation_id = representation_attrib.get('id') representation_id = representation_attrib.get('id')
lang = representation_attrib.get('lang') lang = representation_attrib.get('lang')
url_el = representation.find(_add_ns('BaseURL')) url_el = representation.find(_add_ns('BaseURL'))
filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
bandwidth = int_or_none(representation_attrib.get('bandwidth')) bandwidth = int_or_none(representation_attrib.get('bandwidth'))
format_id = join_nonempty(representation_id or content_type, mpd_id)
if content_type in ('video', 'audio'):
f = { f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'manifest_url': mpd_url, 'manifest_url': mpd_url,
@ -2288,8 +2472,27 @@ class InfoExtractor(object):
'filesize': filesize, 'filesize': filesize,
'container': mimetype2ext(mime_type) + '_dash', 'container': mimetype2ext(mime_type) + '_dash',
} }
f.update(parse_codecs(representation_attrib.get('codecs'))) f.update(codecs)
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) elif content_type == 'text':
f = {
'ext': mimetype2ext(mime_type),
'manifest_url': mpd_url,
'filesize': filesize,
}
elif content_type == 'image/jpeg':
# See test case in VikiIE
# https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1
f = {
'format_id': format_id,
'ext': 'mhtml',
'manifest_url': mpd_url,
'format_note': 'DASH storyboards (jpeg)',
'acodec': 'none',
'vcodec': 'none',
}
if is_drm_protected(adaptation_set) or is_drm_protected(representation):
f['has_drm'] = True
representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info)
def prepare_template(template_name, identifiers): def prepare_template(template_name, identifiers):
tmpl = representation_ms_info[template_name] tmpl = representation_ms_info[template_name]
@ -2330,6 +2533,11 @@ class InfoExtractor(object):
def location_key(location): def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path' return 'url' if re.match(r'^https?://', location) else 'path'
def calc_segment_duration():
return float_or_none(
representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
@ -2341,7 +2549,8 @@ class InfoExtractor(object):
segment_duration = None segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) representation_ms_info['total_number'] = int(math.ceil(
float_or_none(period_duration, segment_duration, default=0)))
representation_ms_info['fragments'] = [{ representation_ms_info['fragments'] = [{
media_location_key: media_template % { media_location_key: media_template % {
'Number': segment_number, 'Number': segment_number,
@ -2381,11 +2590,12 @@ class InfoExtractor(object):
add_segment_url() add_segment_url()
segment_number += 1 segment_number += 1
segment_time += segment_d segment_time += segment_d
elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: elif 'segment_urls' in representation_ms_info:
fragments = []
if 's' in representation_ms_info:
# No media template # No media template
# Example: https://www.youtube.com/watch?v=iXZV5uAYMJI # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
# or any YouTube dashsegments video # or any YouTube dashsegments video
fragments = []
segment_index = 0 segment_index = 0
timescale = representation_ms_info['timescale'] timescale = representation_ms_info['timescale']
for s in representation_ms_info['s']: for s in representation_ms_info['s']:
@ -2397,28 +2607,37 @@ class InfoExtractor(object):
'duration': duration, 'duration': duration,
}) })
segment_index += 1 segment_index += 1
representation_ms_info['fragments'] = fragments elif 'segment_urls_range' in representation_ms_info:
elif 'segment_urls' in representation_ms_info: # Segment URLs with mediaRange
# Example: https://kinescope.io/200615537/master.mpd
# https://github.com/ytdl-org/youtube-dl/issues/30235
# or any mpd generated with Bento4 `mp4dash --no-split --use-segment-list`
segment_duration = calc_segment_duration()
for segment_url, segment_url_range in zip(
representation_ms_info['segment_urls'], representation_ms_info['segment_urls_range']):
fragments.append({
location_key(segment_url): segment_url,
'range': segment_url_range,
'duration': segment_duration,
})
else:
# Segment URLs with no SegmentTimeline # Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091 # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
# https://github.com/ytdl-org/youtube-dl/pull/14844 # https://github.com/ytdl-org/youtube-dl/pull/14844
fragments = [] segment_duration = calc_segment_duration()
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
for segment_url in representation_ms_info['segment_urls']: for segment_url in representation_ms_info['segment_urls']:
fragment = { fragments.append({
location_key(segment_url): segment_url, location_key(segment_url): segment_url,
} 'duration': segment_duration,
if segment_duration: })
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments representation_ms_info['fragments'] = fragments
# If there is a fragments key available then we correctly recognized fragmented media. # If there is a fragments key available then we correctly recognized fragmented media.
# Otherwise we will assume unfragmented media with direct access. Technically, such # Otherwise we will assume unfragmented media with direct access. Technically, such
# assumption is not necessarily correct since we may simply have no support for # assumption is not necessarily correct since we may simply have no support for
# some forms of fragmented media renditions yet, but for now we'll use this fallback. # some forms of fragmented media renditions yet, but for now we'll use this fallback.
if 'fragments' in representation_ms_info: if 'fragments' in representation_ms_info:
base_url = representation_ms_info['base_url']
f.update({ f.update({
# NB: mpd_url may be empty when MPD manifest is parsed from a string # NB: mpd_url may be empty when MPD manifest is parsed from a string
'url': mpd_url or base_url, 'url': mpd_url or base_url,
@ -2426,19 +2645,40 @@ class InfoExtractor(object):
'fragments': [], 'fragments': [],
'protocol': 'http_dash_segments', 'protocol': 'http_dash_segments',
}) })
if 'initialization_url' in representation_ms_info: if 'initialization_url' in representation_ms_info and 'initialization_url_range' in representation_ms_info:
# Initialization URL with range (accompanied by Segment URLs with mediaRange above)
# https://github.com/ytdl-org/youtube-dl/issues/30235
initialization_url = representation_ms_info['initialization_url']
f['fragments'].append({
location_key(initialization_url): initialization_url,
'range': representation_ms_info['initialization_url_range'],
})
elif 'initialization_url' in representation_ms_info:
initialization_url = representation_ms_info['initialization_url'] initialization_url = representation_ms_info['initialization_url']
if not f.get('url'): if not f.get('url'):
f['url'] = initialization_url f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].append({location_key(initialization_url): initialization_url})
elif 'initialization_url_range' in representation_ms_info:
# no Initialization URL but range (accompanied by no Segment URLs but mediaRange above)
# https://github.com/ytdl-org/youtube-dl/issues/27575
f['fragments'].append({
location_key(base_url): base_url,
'range': representation_ms_info['initialization_url_range'],
})
f['fragments'].extend(representation_ms_info['fragments']) f['fragments'].extend(representation_ms_info['fragments'])
if not period_duration:
period_duration = sum(traverse_obj(representation_ms_info, (
'fragments', Ellipsis, 'duration', T(float_or_none))))
else: else:
# Assuming direct URL to unfragmented media. # Assuming direct URL to unfragmented media.
f['url'] = base_url f['url'] = representation_ms_info['base_url']
if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1
formats.append(f) formats.append(f)
else: elif content_type == 'text':
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) subtitles.setdefault(lang or 'und', []).append(f)
return formats return formats, subtitles
def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle( res = self._download_xml_handle(
@ -2785,25 +3025,21 @@ class InfoExtractor(object):
return formats return formats
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search( return self._search_json(
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''', r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
webpage) webpage, 'JWPlayer data', video_id,
if mobj: # must be a {...} or sequence, ending
try: contains_pattern=r'\{[\s\S]*}(?(load)(?:\s*,\s*\{[\s\S]*})*)', end_pattern=r'(?(load)\]|\))',
jwplayer_data = self._parse_json(mobj.group('options'), transform_source=transform_source, default=None)
video_id=video_id,
transform_source=transform_source)
except ExtractorError:
pass
else:
if isinstance(jwplayer_data, dict):
return jwplayer_data
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
jwplayer_data = self._find_jwplayer_data( # allow passing `transform_source` through to _find_jwplayer_data()
webpage, video_id, transform_source=js_to_json) transform_source = kwargs.pop('transform_source', None)
return self._parse_jwplayer_data( kwfind = compat_kwargs({'transform_source': transform_source}) if transform_source else {}
jwplayer_data, video_id, *args, **kwargs)
jwplayer_data = self._find_jwplayer_data(webpage, video_id, **kwfind)
return self._parse_jwplayer_data(jwplayer_data, video_id, *args, **kwargs)
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
@ -2837,16 +3073,8 @@ class InfoExtractor(object):
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
subtitles = {} subtitles = {}
tracks = video_data.get('tracks') for track in traverse_obj(video_data, (
if tracks and isinstance(tracks, list): 'tracks', lambda _, t: t.get('kind').lower() in ('captions', 'subtitles'))):
for track in tracks:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
if not track_kind or not isinstance(track_kind, compat_str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
track_url = urljoin(base_url, track.get('file')) track_url = urljoin(base_url, track.get('file'))
if not track_url: if not track_url:
continue continue
@ -3071,12 +3299,16 @@ class InfoExtractor(object):
return ret return ret
@classmethod @classmethod
def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2): def _merge_subtitles(cls, subtitle_dict1, *subtitle_dicts, **kwargs):
""" Merge two subtitle dictionaries, language by language. """ """ Merge subtitle dictionaries, language by language. """
ret = dict(subtitle_dict1)
for lang in subtitle_dict2: # ..., * , target=None
ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang]) target = kwargs.get('target') or dict(subtitle_dict1)
return ret
for subtitle_dict in subtitle_dicts:
for lang in subtitle_dict:
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subtitle_dict[lang])
return target
def extract_automatic_captions(self, *args, **kwargs): def extract_automatic_captions(self, *args, **kwargs):
if (self._downloader.params.get('writeautomaticsub', False) if (self._downloader.params.get('writeautomaticsub', False)
@ -3109,6 +3341,29 @@ class InfoExtractor(object):
def _generic_title(self, url): def _generic_title(self, url):
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
def _yes_playlist(self, playlist_id, video_id, *args, **kwargs):
# smuggled_data=None, *, playlist_label='playlist', video_label='video'
smuggled_data = args[0] if len(args) == 1 else kwargs.get('smuggled_data')
playlist_label = kwargs.get('playlist_label', 'playlist')
video_label = kwargs.get('video_label', 'video')
if not playlist_id or not video_id:
return not video_id
no_playlist = (smuggled_data or {}).get('force_noplaylist')
if no_playlist is not None:
return not no_playlist
video_id = '' if video_id is True else ' ' + video_id
noplaylist = self.get_param('noplaylist')
self.to_screen(
'Downloading just the {0}{1} because of --no-playlist'.format(video_label, video_id)
if noplaylist else
'Downloading {0}{1} - add --no-playlist to download just the {2}{3}'.format(
playlist_label, '' if playlist_id is True else ' ' + playlist_id,
video_label, video_id))
return not noplaylist
class SearchInfoExtractor(InfoExtractor): class SearchInfoExtractor(InfoExtractor):
""" """

View File

@ -0,0 +1,101 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
float_or_none,
T,
traverse_obj,
txt_or_none,
unified_timestamp,
url_or_none,
)
class EpidemicSoundIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
'info_dict': {
'id': '45014',
'display_id': 'yFfQVRpSPz',
'ext': 'mp3',
'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
'title': 'Door Knock Door 1',
'duration': 1,
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
'timestamp': 1415320353,
'upload_date': '20141107',
'age_limit': None,
# check that the "best" format was found, since test file MD5 doesn't
# distinguish the formats
'format': 'full',
},
}, {
'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
'md5': 'c82b745890f9baf18dc2f8d568ee3830',
'info_dict': {
'id': '148700',
'display_id': 'mj8GTTwsZd',
'ext': 'mp3',
'tags': ['liquid drum n bass', 'energetic'],
'title': 'Noplace',
'duration': 237,
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
'timestamp': 1694426482,
'release_timestamp': 1700535606,
'upload_date': '20230911',
'age_limit': None,
'format': 'full',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
json_data = self._download_json('https://www.epidemicsound.com/json/track/' + video_id, video_id)
def fmt_or_none(f):
if not f.get('format'):
f['format'] = f.get('format_id')
elif not f.get('format_id'):
f['format_id'] = f['format']
if not (f['url'] and f['format']):
return
if f.get('format_note'):
f['format_note'] = 'track ID ' + f['format_note']
f['preference'] = -1 if f['format'] == 'full' else -2
return f
formats = traverse_obj(json_data, (
'stems', T(dict.items), Ellipsis, {
'format': (0, T(txt_or_none)),
'format_note': (1, 's3TrackId', T(txt_or_none)),
'format_id': (1, 'stemType', T(txt_or_none)),
'url': (1, 'lqMp3Url', T(url_or_none)),
}, T(fmt_or_none)))
self._sort_formats(formats)
info = traverse_obj(json_data, {
'id': ('id', T(txt_or_none)),
'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
'title': ('title', T(txt_or_none)),
'duration': ('length', T(float_or_none)),
'timestamp': ('added', T(unified_timestamp)),
'thumbnail': (('imageUrl', 'cover'), T(url_or_none)),
'age_limit': ('isExplicit', T(lambda b: 18 if b else None)),
'release_timestamp': ('releaseDate', T(unified_timestamp)),
}, get_all=False)
info.update(traverse_obj(json_data, {
'categories': ('genres', Ellipsis, 'tag', T(txt_or_none)),
'tags': ('metadataTags', Ellipsis, T(txt_or_none)),
}))
info.update({
'display_id': video_id,
'formats': formats,
})
return info

View File

@ -159,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE
from .caffeine import CaffeineTVIE
from .callin import CallinIE from .callin import CallinIE
from .camdemy import ( from .camdemy import (
CamdemyIE, CamdemyIE,
@ -357,6 +358,7 @@ from .ellentube import (
from .elpais import ElPaisIE from .elpais import ElPaisIE
from .embedly import EmbedlyIE from .embedly import EmbedlyIE
from .engadget import EngadgetIE from .engadget import EngadgetIE
from .epidemicsound import EpidemicSoundIE
from .eporner import EpornerIE from .eporner import EpornerIE
from .eroprofile import EroProfileIE from .eroprofile import EroProfileIE
from .escapist import EscapistIE from .escapist import EscapistIE
@ -381,7 +383,6 @@ from .fc2 import (
FC2EmbedIE, FC2EmbedIE,
) )
from .fczenit import FczenitIE from .fczenit import FczenitIE
from .filemoon import FileMoonIE
from .fifa import FifaIE from .fifa import FifaIE
from .filmon import ( from .filmon import (
FilmOnIE, FilmOnIE,
@ -442,6 +443,7 @@ from .gamespot import GameSpotIE
from .gamestar import GameStarIE from .gamestar import GameStarIE
from .gaskrank import GaskrankIE from .gaskrank import GaskrankIE
from .gazeta import GazetaIE from .gazeta import GazetaIE
from .gbnews import GBNewsIE
from .gdcvault import GDCVaultIE from .gdcvault import GDCVaultIE
from .gedidigital import GediDigitalIE from .gedidigital import GediDigitalIE
from .generic import GenericIE from .generic import GenericIE
@ -896,21 +898,13 @@ from .ooyala import (
) )
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFONIE,
ORFFM4IE, ORFONLiveIE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFOE1IE,
ORFOE3IE,
ORFNOEIE,
ORFWIEIE,
ORFBGLIE,
ORFOOEIE,
ORFSTMIE,
ORFKTNIE,
ORFSBGIE,
ORFTIRIE,
ORFVBGIE,
ORFIPTVIE, ORFIPTVIE,
ORFPodcastIE,
ORFRadioIE,
ORFRadioCollectionIE,
) )
from .outsidetv import OutsideTVIE from .outsidetv import OutsideTVIE
from .packtpub import ( from .packtpub import (
@ -1653,7 +1647,15 @@ from .younow import (
YouNowChannelIE, YouNowChannelIE,
YouNowMomentIE, YouNowMomentIE,
) )
from .youporn import YouPornIE from .youporn import (
YouPornIE,
YouPornCategoryIE,
YouPornChannelIE,
YouPornCollectionIE,
YouPornStarIE,
YouPornTagIE,
YouPornVideosIE,
)
from .yourporn import YourPornIE from .yourporn import YourPornIE
from .yourupload import YourUploadIE from .yourupload import YourUploadIE
from .youtube import ( from .youtube import (

View File

@ -1,43 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
js_to_json,
)
class FileMoonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
_TEST = {
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
'md5': '5a713742f57ac4aef29b74733e8dda01',
'info_dict': {
'id': 'dw40rxrzruqz',
'title': 'dw40rxrzruqz',
'ext': 'mp4'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
packed = matches[-1]
unpacked = decode_packed_codes(packed)
jwplayer_sources = self._parse_json(
self._search_regex(
r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
video_id, transform_source=js_to_json)
formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
return {
'id': video_id,
'title': self._generic_title(url) or video_id,
'formats': formats
}

View File

@ -0,0 +1,139 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
extract_attributes,
ExtractorError,
T,
traverse_obj,
txt_or_none,
url_or_none,
)
class GBNewsIE(InfoExtractor):
IE_DESC = 'GB News clips, features and live stream'
# \w+ is normally shows or news, but apparently any word redirects to the correct URL
_VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)'
_PLATFORM = 'safari'
_SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php'
_TESTS = [{
'url': 'https://www.gbnews.uk/shows/andrew-neils-message-to-companies-choosing-to-boycott-gb-news/106889',
'info_dict': {
'id': '106889',
'ext': 'mp4',
'title': "Andrew Neil's message to companies choosing to boycott GB News",
'description': 'md5:b281f5d22fd6d5eda64a4e3ba771b351',
},
'skip': '404 not found',
}, {
'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row',
'info_dict': {
'id': '52264136',
'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row',
'ext': 'mp4',
'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism',
'description': 'The post was criticised by former employers of the broadcaster',
},
}, {
'url': 'https://www.gbnews.uk/watchlive',
'info_dict': {
'id': '1069',
'display_id': 'watchlive',
'ext': 'mp4',
'title': 'GB News Live',
'is_live': True,
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url).split('/')[-1]
webpage = self._download_webpage(url, display_id)
# extraction based on https://github.com/ytdl-org/youtube-dl/issues/29341
'''
<div id="video-106908"
class="simplestream"
data-id="GB001"
data-type="vod"
data-key="3Li3Nt2Qs8Ct3Xq9Fi5Uy0Mb2Bj0Qs"
data-token="f9c317c727dc07f515b20036c8ef14a6"
data-expiry="1624300052"
data-uvid="37900558"
data-poster="https://thumbnails.simplestreamcdn.com/gbnews/ondemand/37900558.jpg?width=700&"
data-npaw="false"
data-env="production">
'''
# exception if no match
video_data = self._search_regex(
r'(<div\s[^>]*\bclass\s*=\s*(\'|")(?!.*sidebar\b)simplestream(?:\s[\s\w$-]*)?\2[^>]*>)',
webpage, 'video data')
video_data = extract_attributes(video_data)
ss_id = video_data.get('data-id')
if not ss_id:
raise ExtractorError('Simplestream ID not found')
json_data = self._download_json(
self._SSMP_URL, display_id,
note='Downloading Simplestream JSON metadata',
errnote='Unable to download Simplestream JSON metadata',
query={
'id': ss_id,
'env': video_data.get('data-env', 'production'),
}, fatal=False)
meta_url = traverse_obj(json_data, ('response', 'api_hostname'))
if not meta_url:
raise ExtractorError('No API host found')
uvid = video_data['data-uvid']
dtype = video_data.get('data-type')
stream_data = self._download_json(
'%s/api/%s/stream/%s' % (meta_url, 'show' if dtype == 'vod' else dtype, uvid),
uvid,
query={
'key': video_data.get('data-key'),
'platform': self._PLATFORM,
},
headers={
'Token': video_data.get('data-token'),
'Token-Expiry': video_data.get('data-expiry'),
'Uvid': uvid,
}, fatal=False)
stream_url = traverse_obj(stream_data, (
'response', 'stream', T(url_or_none)))
if not stream_url:
raise ExtractorError('No stream data/URL')
# now known to be a dict
stream_data = stream_data['response']
drm = stream_data.get('drm')
if drm:
self.report_drm(uvid)
formats = self._extract_m3u8_formats(
stream_url, uvid, ext='mp4', entry_protocol='m3u8_native',
fatal=False)
# exception if no formats
self._sort_formats(formats)
return {
'id': uvid,
'display_id': display_id,
'title': (traverse_obj(stream_data, ('title', T(txt_or_none)))
or self._og_search_title(webpage, default=None)
or display_id.replace('-', ' ').capitalize()),
'description': self._og_search_description(webpage, default=None),
'thumbnail': (traverse_obj(video_data, ('data-poster', T(url_or_none)))
or self._og_search_thumbnail(webpage)),
'formats': formats,
'is_live': (dtype == 'live') or None,
}

View File

@ -1,101 +1,267 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts,
mimetype2ext, mimetype2ext,
ExtractorError, parse_iso8601,
T,
traverse_obj,
txt_or_none,
url_or_none,
) )
class ImgurIE(InfoExtractor): class ImgurBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)' # hard-coded value, as also used by ArchiveTeam
_CLIENT_ID = '546c25a59c58ad7'
@classmethod
def _imgur_result(cls, item_id):
return cls.url_result('imgur:%s' % item_id, ImgurIE.ie_key(), item_id)
def _call_api(self, endpoint, video_id, **kwargs):
return self._download_json(
'https://api.imgur.com/post/v1/%s/%s?client_id=%s&include=media,account' % (endpoint, video_id, self._CLIENT_ID),
video_id, **kwargs)
@staticmethod
def get_description(s):
if 'Discover the magic of the internet at Imgur' in s:
return None
return txt_or_none(s)
class ImgurIE(ImgurBaseIE):
_VALID_URL = r'''(?x)
(?:
https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)|
imgur:
)(?P<id>[a-zA-Z0-9]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://imgur.com/A61SaA1',
'info_dict': { 'info_dict': {
'id': 'A61SaA1', 'id': 'A61SaA1',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
'timestamp': 1416446068,
'upload_date': '20141120',
}, },
}, { }, {
'url': 'https://imgur.com/A61SaA1', 'url': 'https://i.imgur.com/A61SaA1.gifv',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://i.imgur.com/crGpqCV.mp4', 'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True, 'only_matching': True,
}, { }, {
# no title # previously, no title
'url': 'https://i.imgur.com/jxBXAMC.gifv', 'url': 'https://i.imgur.com/jxBXAMC.gifv',
'only_matching': True, 'info_dict': {
'id': 'jxBXAMC',
'ext': 'mp4',
'title': 'Fahaka puffer feeding',
'timestamp': 1533835503,
'upload_date': '20180809',
},
}]
def _extract_twitter_formats(self, html, tw_id='twitter', **kwargs):
fatal = kwargs.pop('fatal', False)
tw_stream = self._html_search_meta('twitter:player:stream', html, fatal=fatal, **kwargs)
if not tw_stream:
return []
ext = mimetype2ext(self._html_search_meta(
'twitter:player:stream:content_type', html, default=None))
width, height = (int_or_none(self._html_search_meta('twitter:player:' + v, html, default=None))
for v in ('width', 'height'))
return [{
'format_id': tw_id,
'url': tw_stream,
'ext': ext or determine_ext(tw_stream),
'width': width,
'height': height,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._call_api('media', video_id, fatal=False, expected_status=404)
webpage = self._download_webpage( webpage = self._download_webpage(
'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id) 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id, fatal=not data) or ''
width = int_or_none(self._og_search_property( if not traverse_obj(data, ('media', 0, (
'video:width', webpage, default=None)) ('type', T(lambda t: t == 'video' or None)),
height = int_or_none(self._og_search_property( ('metadata', 'is_animated'))), get_all=False):
'video:height', webpage, default=None)) raise ExtractorError(
'%s is not a video or animated image' % video_id,
expected=True)
media_fmt = traverse_obj(data, ('media', 0, {
'url': ('url', T(url_or_none)),
'ext': 'ext',
'width': ('width', T(int_or_none)),
'height': ('height', T(int_or_none)),
'filesize': ('size', T(int_or_none)),
'acodec': ('metadata', 'has_sound', T(lambda b: None if b else 'none')),
}))
media_url = traverse_obj(media_fmt, 'url')
if media_url:
if not media_fmt.get('ext'):
media_fmt['ext'] = mimetype2ext(traverse_obj(
data, ('media', 0, 'mime_type'))) or determine_ext(media_url)
if traverse_obj(data, ('media', 0, 'type')) == 'image':
media_fmt['acodec'] = 'none'
media_fmt.setdefault('preference', -10)
tw_formats = self._extract_twitter_formats(webpage)
if traverse_obj(tw_formats, (0, 'url')) == media_url:
tw_formats = []
else:
# maybe this isn't an animated image/video?
self._check_formats(tw_formats, video_id)
video_elements = self._search_regex( video_elements = self._search_regex(
r'(?s)<div class="video-elements">(.*?)</div>', r'(?s)<div class="video-elements">(.*?)</div>',
webpage, 'video elements', default=None) webpage, 'video elements', default=None)
if not video_elements: if not (video_elements or tw_formats or media_url):
raise ExtractorError( raise ExtractorError(
'No sources found for video %s. Maybe an image?' % video_id, 'No sources found for video %s. Maybe a plain image?' % video_id,
expected=True) expected=True)
formats = [] def mung_format(fmt, *extra):
for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): fmt.update({
formats.append({
'format_id': m.group('type').partition('/')[2],
'url': self._proto_relative_url(m.group('src')),
'ext': mimetype2ext(m.group('type')),
'width': width,
'height': height,
'http_headers': { 'http_headers': {
'User-Agent': 'youtube-dl (like wget)', 'User-Agent': 'youtube-dl (like wget)',
}, },
}) })
for d in extra:
fmt.update(d)
return fmt
if video_elements:
def og_get_size(media_type):
return dict((p, int_or_none(self._og_search_property(
':'.join((media_type, p)), webpage, default=None)))
for p in ('width', 'height'))
size = og_get_size('video')
if all(v is None for v in size.values()):
size = og_get_size('image')
formats = traverse_obj(
re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements),
(Ellipsis, {
'format_id': ('type', T(lambda s: s.partition('/')[2])),
'url': ('src', T(self._proto_relative_url)),
'ext': ('type', T(mimetype2ext)),
}, T(lambda f: mung_format(f, size))))
gif_json = self._search_regex( gif_json = self._search_regex(
r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
webpage, 'GIF code', fatal=False) webpage, 'GIF code', fatal=False)
if gif_json: MUST_BRANCH = (None, T(lambda _: None))
gifd = self._parse_json( formats.extend(traverse_obj(gif_json, (
gif_json, video_id, transform_source=js_to_json) T(lambda j: self._parse_json(
formats.append({ j, video_id, transform_source=js_to_json, fatal=False)), {
'url': ('gifUrl', T(self._proto_relative_url)),
'filesize': ('size', T(int_or_none)),
}, T(lambda f: mung_format(f, size, {
'format_id': 'gif', 'format_id': 'gif',
'preference': -10, 'preference': -10, # gifs are worse than videos
'width': width,
'height': height,
'ext': 'gif', 'ext': 'gif',
'acodec': 'none', 'acodec': 'none',
'vcodec': 'gif', 'vcodec': 'gif',
'container': 'gif', 'container': 'gif',
'url': self._proto_relative_url(gifd['gifUrl']), })), MUST_BRANCH)))
'filesize': gifd.get('size'), else:
'http_headers': { formats = []
'User-Agent': 'youtube-dl (like wget)',
}, # maybe add formats from JSON or page Twitter metadata
}) if not any((u == media_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
formats.append(mung_format(media_fmt))
tw_url = traverse_obj(tw_formats, (0, 'url'))
if not any((u == tw_url) for u in traverse_obj(formats, (Ellipsis, 'url'))):
formats.extend(mung_format(f) for f in tw_formats)
self._sort_formats(formats) self._sort_formats(formats)
return { return merge_dicts(traverse_obj(data, {
'uploader_id': ('account_id', T(txt_or_none),
T(lambda a: a if int_or_none(a) != 0 else None)),
'uploader': ('account', 'username', T(txt_or_none)),
'uploader_url': ('account', 'avatar_url', T(url_or_none)),
'like_count': ('upvote_count', T(int_or_none)),
'dislike_count': ('downvote_count', T(int_or_none)),
'comment_count': ('comment_count', T(int_or_none)),
'age_limit': ('is_mature', T(lambda x: 18 if x else None)),
'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
'release_timestamp': ('created_at', T(parse_iso8601)),
}, get_all=False), traverse_obj(data, ('media', 0, 'metadata', {
'title': ('title', T(txt_or_none)),
'description': ('description', T(self.get_description)),
'duration': ('duration', T(float_or_none)),
'timestamp': (('updated_at', 'created_at'), T(parse_iso8601)),
'release_timestamp': ('created_at', T(parse_iso8601)),
})), {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': self._og_search_title(webpage, default=video_id), 'title': self._og_search_title(webpage, default='Imgur video ' + video_id),
} 'description': self.get_description(self._og_search_description(webpage)),
'thumbnail': url_or_none(self._html_search_meta('thumbnailUrl', webpage, default=None)),
})
class ImgurGalleryIE(InfoExtractor): class ImgurGalleryBaseIE(ImgurBaseIE):
_GALLERY = True
def _real_extract(self, url):
gallery_id = self._match_id(url)
data = self._call_api('albums', gallery_id, fatal=False, expected_status=404)
info = traverse_obj(data, {
'title': ('title', T(txt_or_none)),
'description': ('description', T(self.get_description)),
})
if traverse_obj(data, 'is_album'):
def yield_media_ids():
for m_id in traverse_obj(data, (
'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'],
'id', T(txt_or_none))):
yield m_id
# if a gallery with exactly one video, apply album metadata to video
media_id = (
self._GALLERY
and traverse_obj(data, ('image_count', T(lambda c: c == 1)))
and next(yield_media_ids(), None))
if not media_id:
result = self.playlist_result(
map(self._imgur_result, yield_media_ids()), gallery_id)
result.update(info)
return result
gallery_id = media_id
result = self._imgur_result(gallery_id)
info['_type'] = 'url_transparent'
result.update(info)
return result
class ImgurGalleryIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:gallery' IE_NAME = 'imgur:gallery'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
@ -106,49 +272,93 @@ class ImgurGalleryIE(InfoExtractor):
'title': 'Adding faces make every GIF better', 'title': 'Adding faces make every GIF better',
}, },
'playlist_count': 25, 'playlist_count': 25,
'skip': 'Zoinks! You\'ve taken a wrong turn.',
}, { }, {
# TODO: static images - replace with animated/video gallery
'url': 'http://imgur.com/topic/Aww/ll5Vk', 'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://imgur.com/gallery/YcAQlkx', 'url': 'https://imgur.com/gallery/YcAQlkx',
'add_ies': ['Imgur'],
'info_dict': { 'info_dict': {
'id': 'YcAQlkx', 'id': 'YcAQlkx',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
} 'timestamp': 1358554297,
'upload_date': '20130119',
'uploader_id': '1648642',
'uploader': 'wittyusernamehere',
},
}, { }, {
# TODO: static image - replace with animated/video gallery
'url': 'http://imgur.com/topic/Funny/N8rOudd', 'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://imgur.com/r/aww/VQcQPhM', 'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True, 'add_ies': ['Imgur'],
'info_dict': {
'id': 'VQcQPhM',
'ext': 'mp4',
'title': 'The boss is here',
'timestamp': 1476494751,
'upload_date': '20161015',
'uploader_id': '19138530',
'uploader': 'thematrixcam',
},
},
# from PR #16674
{
'url': 'https://imgur.com/t/unmuted/6lAn9VQ',
'info_dict': {
'id': '6lAn9VQ',
'title': 'Penguins !',
},
'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/kx2uD3C',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'ZVMv45i',
'ext': 'mp4',
'title': 'Intruder',
'timestamp': 1528129683,
'upload_date': '20180604',
},
}, {
'url': 'https://imgur.com/t/unmuted/wXSK0YH',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'JCAP4io',
'ext': 'mp4',
'title': 're:I got the blues$',
'description': 'Lukas vocal stylings.\n\nFP edit: dont encourage me. Ill never stop posting Luka and friends.',
'timestamp': 1527809525,
'upload_date': '20180531',
},
}] }]
def _real_extract(self, url):
gallery_id = self._match_id(url)
data = self._download_json( class ImgurAlbumIE(ImgurGalleryBaseIE):
'https://imgur.com/gallery/%s.json' % gallery_id,
gallery_id)['data']['image']
if data.get('is_album'):
entries = [
self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
for image in data['album_images']['images'] if image.get('hash')]
return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
class ImgurAlbumIE(ImgurGalleryIE):
IE_NAME = 'imgur:album' IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
_GALLERY = False
_TESTS = [{ _TESTS = [{
# TODO: only static images - replace with animated/video gallery
'url': 'http://imgur.com/a/j6Orj', 'url': 'http://imgur.com/a/j6Orj',
'info_dict': { 'only_matching': True,
'id': 'j6Orj',
'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
}, },
'playlist_count': 12, # from PR #21693
{
'url': 'https://imgur.com/a/iX265HX',
'info_dict': {
'id': 'iX265HX',
'title': 'enen-no-shouboutai'
},
'playlist_count': 2,
}, {
'url': 'https://imgur.com/a/8pih2Ed',
'info_dict': {
'id': '8pih2Ed'
},
'playlist_mincount': 1,
}] }]

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
@ -10,7 +11,7 @@ from ..compat import (
compat_ord, compat_ord,
compat_str, compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_zip compat_zip as zip,
) )
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@ -24,7 +25,7 @@ class MixcloudBaseIE(InfoExtractor):
def _call_api(self, object_type, object_fields, display_id, username, slug=None): def _call_api(self, object_type, object_fields, display_id, username, slug=None):
lookup_key = object_type + 'Lookup' lookup_key = object_type + 'Lookup'
return self._download_json( return self._download_json(
'https://www.mixcloud.com/graphql', display_id, query={ 'https://app.mixcloud.com/graphql', display_id, query={
'query': '''{ 'query': '''{
%s(lookup: {username: "%s"%s}) { %s(lookup: {username: "%s"%s}) {
%s %s
@ -44,7 +45,7 @@ class MixcloudIE(MixcloudBaseIE):
'ext': 'm4a', 'ext': 'm4a',
'title': 'Cryptkeeper', 'title': 'Cryptkeeper',
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.', 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
'uploader': 'Daniel Holbach', 'uploader': 'dholbach', # was: 'Daniel Holbach',
'uploader_id': 'dholbach', 'uploader_id': 'dholbach',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'view_count': int, 'view_count': int,
@ -57,7 +58,7 @@ class MixcloudIE(MixcloudBaseIE):
'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat', 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Caribou 7 inch Vinyl Mix & Chat', 'title': 'Caribou 7 inch Vinyl Mix & Chat',
'description': 'md5:2b8aec6adce69f9d41724647c65875e8', 'description': r're:Last week Dan Snaith aka Caribou swung by the Brownswood.{136}',
'uploader': 'Gilles Peterson Worldwide', 'uploader': 'Gilles Peterson Worldwide',
'uploader_id': 'gillespeterson', 'uploader_id': 'gillespeterson',
'thumbnail': 're:https?://.*', 'thumbnail': 're:https?://.*',
@ -65,6 +66,23 @@ class MixcloudIE(MixcloudBaseIE):
'timestamp': 1422987057, 'timestamp': 1422987057,
'upload_date': '20150203', 'upload_date': '20150203',
}, },
'params': {
'skip_download': '404 not found',
},
}, {
'url': 'https://www.mixcloud.com/gillespeterson/carnival-m%C3%BAsica-popular-brasileira-mix/',
'info_dict': {
'id': 'gillespeterson_carnival-música-popular-brasileira-mix',
'ext': 'm4a',
'title': 'Carnival Música Popular Brasileira Mix',
'description': r're:Gilles was recently in Brazil to play at Boiler Room.{208}',
'timestamp': 1454347174,
'upload_date': '20160201',
'uploader': 'Gilles Peterson Worldwide',
'uploader_id': 'gillespeterson',
'thumbnail': 're:https?://.*',
'view_count': int,
},
}, { }, {
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/', 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
'only_matching': True, 'only_matching': True,
@ -76,10 +94,10 @@ class MixcloudIE(MixcloudBaseIE):
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
return ''.join([ return ''.join([
compat_chr(compat_ord(ch) ^ compat_ord(k)) compat_chr(compat_ord(ch) ^ compat_ord(k))
for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) for ch, k in zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url): def _real_extract(self, url):
username, slug = re.match(self._VALID_URL, url).groups() username, slug = self._match_valid_url(url).groups()
username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
track_id = '%s_%s' % (username, slug) track_id = '%s_%s' % (username, slug)

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get, traverse_obj,
) )
@ -109,7 +109,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
} }
name''' name'''
@ classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url) return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
@ -118,7 +118,8 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE):
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist'] artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
def entries(): def entries():
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []): for music in traverse_obj(artist, (
'musics', 'nodes', lambda _, m: m['musicID'])):
yield self._parse_music(music) yield self._parse_music(music)
return self.playlist_result( return self.playlist_result(
@ -137,7 +138,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE):
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande', 'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
'description': 'md5:7043342c09a224598e93546e98e49282', 'description': 'md5:7043342c09a224598e93546e98e49282',
'upload_date': '20161107', 'upload_date': '20161107',
'uploader_id': 'maiaramaraisaoficial', 'uploader_id': '@maiaramaraisaoficial',
'uploader': 'Maiara e Maraisa', 'uploader': 'Maiara e Maraisa',
} }
}] }]

View File

@ -3,17 +3,23 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
url_or_none,
)
class TelewebionIE(InfoExtractor): class TelewebionIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?telewebion\.com/(episode|clip)/(?P<id>[a-zA-Z0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.telewebion.com/#!/episode/1263668/', 'url': 'http://www.telewebion.com/episode/0x1b3139c/',
'info_dict': { 'info_dict': {
'id': '1263668', 'id': '0x1b3139c',
'ext': 'mp4', 'ext': 'mp4',
'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا', 'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://static\.telewebion\.com/episodeImages/.*/default',
'view_count': int, 'view_count': int,
}, },
'params': { 'params': {
@ -25,31 +31,24 @@ class TelewebionIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
secure_token = self._download_webpage( episode_details = self._download_json('https://gateway.telewebion.ir/kandoo/episode/getEpisodeDetail/?EpisodeId={0}'.format(video_id), video_id)
'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id) episode_details = episode_details['body']['queryEpisode'][0]
episode_details = self._download_json(
'http://m.s2.telewebion.com/op/op', video_id,
query={'action': 'getEpisodeDetails', 'episode_id': video_id})
m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % ( channel_id = episode_details['channel']['descriptor']
video_id, episode_details['file_path'], secure_token) episode_image_id = episode_details.get('image')
episode_image = 'https://static.telewebion.com/episodeImages/{0}/default'.format(episode_image_id) if episode_image_id else None
m3u8_url = 'https://cdna.telewebion.com/{0}/episode/{1}/playlist.m3u8'.format(channel_id, video_id)
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', m3u8_id='hls') m3u8_url, video_id, ext='mp4', m3u8_id='hls',
entry_protocol='m3u8_native')
picture_paths = [ self._sort_formats(formats)
episode_details.get('picture_path'),
episode_details.get('large_picture_path'),
]
thumbnails = [{
'url': picture_path,
'preference': idx,
} for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
return { return {
'id': video_id, 'id': video_id,
'title': episode_details['title'], 'title': episode_details['title'],
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnail': url_or_none(episode_image),
'view_count': episode_details.get('view_count'), 'view_count': int_or_none(episode_details.get('view_count')),
'duration': float_or_none(episode_details.get('duration')),
} }

View File

@ -2,9 +2,22 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..compat import compat_kwargs
from ..utils import (
base_url,
determine_ext,
ExtractorError,
float_or_none,
merge_dicts,
T,
traverse_obj,
txt_or_none,
url_basename,
url_or_none,
)
class Vbox7IE(InfoExtractor): class Vbox7IE(InfoExtractor):
@ -20,23 +33,27 @@ class Vbox7IE(InfoExtractor):
) )
(?P<id>[\da-fA-F]+) (?P<id>[\da-fA-F]+)
''' '''
_EMBED_REGEX = [r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)']
_GEO_COUNTRIES = ['BG'] _GEO_COUNTRIES = ['BG']
_TESTS = [{ _TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c', # the http: URL just redirects here
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf', 'url': 'https://vbox7.com/play:0946fff23c',
'md5': '50ca1f78345a9c15391af47d8062d074',
'info_dict': { 'info_dict': {
'id': '0946fff23c', 'id': '0946fff23c',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Борисов: Притеснен съм за бъдещето на България', 'title': 'Борисов: Притеснен съм за бъдещето на България',
'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"', 'description': 'По думите му е опасно страната ни да бъде обявена за "сигурна"',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1470982814, 'timestamp': 1470982814,
'upload_date': '20160812', 'upload_date': '20160812',
'uploader': 'zdraveibulgaria', 'uploader': 'zdraveibulgaria',
'thumbnail': r're:^https?://.*\.jpg$',
'view_count': int,
'duration': 2640,
}, },
'params': { 'expected_warnings': [
'proxy': '127.0.0.1:8118', 'Unable to download webpage',
}, ],
}, { }, {
'url': 'http://vbox7.com/play:249bb972c2', 'url': 'http://vbox7.com/play:249bb972c2',
'md5': '99f65c0c9ef9b682b97313e052734c3f', 'md5': '99f65c0c9ef9b682b97313e052734c3f',
@ -44,8 +61,15 @@ class Vbox7IE(InfoExtractor):
'id': '249bb972c2', 'id': '249bb972c2',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Смях! Чудо - чист за секунди - Скрита камера', 'title': 'Смях! Чудо - чист за секунди - Скрита камера',
'description': 'Смях! Чудо - чист за секунди - Скрита камера',
'timestamp': 1360215023,
'upload_date': '20130207',
'uploader': 'svideteliat_ot_varshava',
'thumbnail': 'https://i49.vbox7.com/o/249/249bb972c20.jpg',
'view_count': int,
'duration': 83,
}, },
'skip': 'georestricted', 'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1', 'url': 'http://vbox7.com/emb/external.php?vid=a240d20f9c&autoplay=1',
'only_matching': True, 'only_matching': True,
@ -54,52 +78,127 @@ class Vbox7IE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod @classmethod
def _extract_url(webpage): def _extract_url(cls, webpage):
mobj = re.search( mobj = re.search(cls._EMBED_REGEX[0], webpage)
r'<iframe[^>]+src=(?P<q>["\'])(?P<url>(?:https?:)?//vbox7\.com/emb/external\.php.+?)(?P=q)',
webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
# specialisation to transform what looks like ld+json that
# may contain invalid character combinations
# transform_source=None, fatal=True
def _parse_json(self, json_string, video_id, *args, **kwargs):
if '"@context"' in json_string[:30]:
# this is ld+json, or that's the way to bet
transform_source = args[0] if len(args) > 0 else kwargs.get('transform_source')
if not transform_source:
def fix_chars(src):
# fix malformed ld+json: replace raw CRLFs with escaped LFs
return re.sub(
r'"[^"]+"', lambda m: re.sub(r'\r?\n', r'\\n', m.group(0)), src)
if len(args) > 0:
args = (fix_chars,) + args[1:]
else:
kwargs['transform_source'] = fix_chars
kwargs = compat_kwargs(kwargs)
return super(Vbox7IE, self)._parse_json(
json_string, video_id, *args, **kwargs)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url = 'https://vbox7.com/play:%s' % (video_id,)
now = time.time()
response = self._download_json( response = self._download_json(
'https://www.vbox7.com/ajax/video/nextvideo.php?vid=%s' % video_id, 'https://www.vbox7.com/aj/player/item/options', video_id,
video_id) query={'vid': video_id}, headers={'Referer': url})
# estimate time to which possible `ago` member is relative
now = now + 0.5 * (time.time() - now)
if 'error' in response: if traverse_obj(response, 'error'):
raise ExtractorError( raise ExtractorError(
'%s said: %s' % (self.IE_NAME, response['error']), expected=True) '%s said: %s' % (self.IE_NAME, response['error']), expected=True)
video = response['options'] src_url = traverse_obj(response, ('options', 'src', T(url_or_none))) or ''
title = video['title'] fmt_base = url_basename(src_url).rsplit('.', 1)[0].rsplit('_', 1)[0]
video_url = video['src'] if fmt_base in ('na', 'vn'):
if '/na.mp4' in video_url:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
uploader = video.get('uploader') ext = determine_ext(src_url)
if ext == 'mpd':
# extract MPD
try:
formats, subtitles = self._extract_mpd_formats_and_subtitles(
src_url, video_id, 'dash', fatal=False)
except KeyError: # fatal doesn't catch this
self.report_warning('Failed to parse MPD manifest')
formats, subtitles = [], {}
elif ext != 'm3u8':
formats = [{
'url': src_url,
}] if src_url else []
subtitles = {}
webpage = self._download_webpage( if src_url:
'http://vbox7.com/play:%s' % video_id, video_id, fatal=None) # possibly extract HLS, based on https://github.com/yt-dlp/yt-dlp/pull/9100
fmt_base = base_url(src_url) + fmt_base
# prepare for _extract_m3u8_formats_and_subtitles()
# hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
hls_formats = self._extract_m3u8_formats(
'{0}.m3u8'.format(fmt_base), video_id, m3u8_id='hls', fatal=False)
formats.extend(hls_formats)
# self._merge_subtitles(hls_subs, target=subtitles)
info = {} # In case MPD/HLS cannot be parsed, or anyway, get mp4 combined
# formats usually provided to Safari, iOS, and old Windows
video = response['options']
resolutions = (1080, 720, 480, 240, 144)
highest_res = traverse_obj(video, (
'highestRes', T(int))) or resolutions[0]
resolutions = traverse_obj(video, (
'resolutions', lambda _, r: highest_res >= int(r) > 0)) or resolutions
mp4_formats = traverse_obj(resolutions, (
Ellipsis, T(lambda res: {
'url': '{0}_{1}.mp4'.format(fmt_base, res),
'format_id': 'http-{0}'.format(res),
'height': res,
})))
# if above formats are flaky, enable the line below
# self._check_formats(mp4_formats, video_id)
formats.extend(mp4_formats)
self._sort_formats(formats)
webpage = self._download_webpage(url, video_id, fatal=False) or ''
if webpage:
info = self._search_json_ld( info = self._search_json_ld(
webpage.replace('"/*@context"', '"@context"'), video_id, webpage.replace('"/*@context"', '"@context"'), video_id,
fatal=False) fatal=False) if webpage else {}
info.update({ if not info.get('title'):
info['title'] = traverse_obj(response, (
'options', 'title', T(txt_or_none))) or self._og_search_title(webpage)
def if_missing(k):
return lambda x: None if k in info else x
info = merge_dicts(info, {
'id': video_id, 'id': video_id,
'title': title, 'formats': formats,
'url': video_url, 'subtitles': subtitles or None,
'uploader': uploader, }, info, traverse_obj(response, ('options', {
'thumbnail': self._proto_relative_url( 'uploader': ('uploader', T(txt_or_none)),
'timestamp': ('ago', T(if_missing('timestamp')), T(lambda t: int(round((now - t) / 60.0)) * 60)),
'duration': ('duration', T(if_missing('duration')), T(float_or_none)),
})))
if 'thumbnail' not in info:
info['thumbnail'] = self._proto_relative_url(
info.get('thumbnail') or self._og_search_thumbnail(webpage), info.get('thumbnail') or self._og_search_thumbnail(webpage),
'http:'), 'https:'),
})
return info return info

View File

@ -6,22 +6,31 @@ import re
import string import string
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_ord,
compat_struct_pack,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_codecs, parse_codecs,
parse_qs,
update_url_query, update_url_query,
urljoin, urljoin,
xpath_element, xpath_element,
xpath_text, xpath_text,
) )
from ..compat import (
compat_b64decode,
compat_ord, def compat_random_choices(population, *args, **kwargs):
compat_struct_pack, # weights=None, *, cum_weights=None, k=1
compat_urlparse, # limited implementation needed here
) weights = args[0] if args else kwargs.get('weights')
assert all(w is None for w in (weights, kwargs.get('cum_weights')))
k = kwargs.get('k', 1)
return ''.join(random.choice(population) for _ in range(k))
class VideaIE(InfoExtractor): class VideaIE(InfoExtractor):
@ -35,6 +44,7 @@ class VideaIE(InfoExtractor):
) )
(?P<id>[^?#&]+) (?P<id>[^?#&]+)
''' '''
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1']
_TESTS = [{ _TESTS = [{
'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ', 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
'md5': '97a7af41faeaffd9f1fc864a7c7e7603', 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
@ -44,6 +54,7 @@ class VideaIE(InfoExtractor):
'title': 'Az őrült kígyász 285 kígyót enged szabadon', 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'duration': 21, 'duration': 21,
'age_limit': 0,
}, },
}, { }, {
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
@ -54,6 +65,7 @@ class VideaIE(InfoExtractor):
'title': 'Supercars előzés', 'title': 'Supercars előzés',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'duration': 64, 'duration': 64,
'age_limit': 0,
}, },
}, { }, {
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
@ -64,6 +76,7 @@ class VideaIE(InfoExtractor):
'title': 'Az őrült kígyász 285 kígyót enged szabadon', 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'duration': 21, 'duration': 21,
'age_limit': 0,
}, },
}, { }, {
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
@ -80,11 +93,14 @@ class VideaIE(InfoExtractor):
}] }]
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
@staticmethod @classmethod
def _extract_urls(webpage): def _extract_urls(cls, webpage):
return [url for _, url in re.findall( def yield_urls():
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', for pattern in cls._EMBED_REGEX:
webpage)] for m in re.finditer(pattern, webpage):
yield m.group('url')
return list(yield_urls())
@staticmethod @staticmethod
def rc4(cipher_text, key): def rc4(cipher_text, key):
@ -130,13 +146,13 @@ class VideaIE(InfoExtractor):
for i in range(0, 32): for i in range(0, 32):
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query) query = parse_qs(player_url)
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) random_seed = ''.join(compat_random_choices(string.ascii_letters + string.digits, k=8))
query['_s'] = random_seed query['_s'] = random_seed
query['_t'] = result[:16] query['_t'] = result[:16]
b64_info, handle = self._download_webpage_handle( b64_info, handle = self._download_webpage_handle(
'http://videa.hu/videaplayer_get_xml.php', video_id, query=query) 'http://videa.hu/player/xml', video_id, query=query)
if b64_info.startswith('<?xml'): if b64_info.startswith('<?xml'):
info = self._parse_xml(b64_info, video_id) info = self._parse_xml(b64_info, video_id)
else: else:

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
get_element_by_id, get_element_by_id,
@ -11,6 +12,7 @@ from ..utils import (
strip_or_none, strip_or_none,
unified_strdate, unified_strdate,
urljoin, urljoin,
str_to_int,
) )
@ -35,6 +37,26 @@ class VidLiiIE(InfoExtractor):
'categories': ['News & Politics'], 'categories': ['News & Politics'],
'tags': ['Vidlii', 'Jan', 'Videogames'], 'tags': ['Vidlii', 'Jan', 'Videogames'],
} }
}, {
# HD
'url': 'https://www.vidlii.com/watch?v=2Ng8Abj2Fkl',
'md5': '450e7da379c884788c3a4fa02a3ce1a4',
'info_dict': {
'id': '2Ng8Abj2Fkl',
'ext': 'mp4',
'title': 'test',
'description': 'md5:cc55a86032a7b6b3cbfd0f6b155b52e9',
'thumbnail': 'https://www.vidlii.com/usfi/thmp/2Ng8Abj2Fkl.jpg',
'uploader': 'VidLii',
'uploader_url': 'https://www.vidlii.com/user/VidLii',
'upload_date': '20200927',
'duration': 5,
'view_count': int,
'comment_count': int,
'average_rating': float,
'categories': ['Film & Animation'],
'tags': list,
},
}, { }, {
'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0', 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0',
'only_matching': True, 'only_matching': True,
@ -46,11 +68,32 @@ class VidLiiIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
'https://www.vidlii.com/watch?v=%s' % video_id, video_id) 'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
video_url = self._search_regex( formats = []
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage,
'video url', group='url')
title = self._search_regex( def add_format(format_url, height=None):
height = int(self._search_regex(r'(\d+)\.mp4',
format_url, 'height', default=360))
formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
})
sources = re.findall(
r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
webpage)
formats = []
if len(sources) > 1:
add_format(sources[1][1])
self._check_formats(formats, video_id)
if len(sources) > 0:
add_format(sources[0][1])
self._sort_formats(formats)
title = self._html_search_regex(
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage, (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
'title') 'title')
@ -82,9 +125,9 @@ class VidLiiIE(InfoExtractor):
default=None) or self._search_regex( default=None) or self._search_regex(
r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
view_count = int_or_none(self._search_regex( view_count = str_to_int(self._html_search_regex(
(r'<strong>(\d+)</strong> views', (r'<strong>([\d,.]+)</strong> views',
r'Views\s*:\s*<strong>(\d+)</strong>'), r'Views\s*:\s*<strong>([\d,.]+)</strong>'),
webpage, 'view count', fatal=False)) webpage, 'view count', fatal=False))
comment_count = int_or_none(self._search_regex( comment_count = int_or_none(self._search_regex(
@ -109,7 +152,7 @@ class VidLiiIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'formats': formats,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,

View File

@ -673,8 +673,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise raise
if '//player.vimeo.com/video/' in url: if '//player.vimeo.com/video/' in url:
config = self._parse_json(self._search_regex( config = self._search_json(
r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id) r'\b(?:playerC|c)onfig\s*=', webpage, 'info section', video_id)
if config.get('view') == 4: if config.get('view') == 4:
config = self._verify_player_video_password( config = self._verify_player_video_password(
redirect_url, video_id, headers) redirect_url, video_id, headers)

View File

@ -4,20 +4,28 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_chr from ..compat import (
compat_chr,
compat_zip as zip,
)
from ..utils import ( from ..utils import (
clean_html,
decode_packed_codes, decode_packed_codes,
determine_ext, determine_ext,
ExtractorError, ExtractorError,
get_element_by_id,
int_or_none, int_or_none,
js_to_json, merge_dicts,
T,
traverse_obj,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 # based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
def aa_decode(aa_code): def aa_decode(aa_code):
symbol_table = [ symbol_table = (
('7', '((゚ー゚) + (o^_^o))'), ('7', '((゚ー゚) + (o^_^o))'),
('6', '((o^_^o) +(o^_^o))'), ('6', '((o^_^o) +(o^_^o))'),
('5', '((゚ー゚) + (゚Θ゚))'), ('5', '((゚ー゚) + (゚Θ゚))'),
@ -26,84 +34,180 @@ def aa_decode(aa_code):
('3', '(o^_^o)'), ('3', '(o^_^o)'),
('1', '(゚Θ゚)'), ('1', '(゚Θ゚)'),
('0', '(c^_^o)'), ('0', '(c^_^o)'),
] ('+', ''),
)
delim = '(゚Д゚)[゚ε゚]+' delim = '(゚Д゚)[゚ε゚]+'
ret = ''
for aa_char in aa_code.split(delim): def chr_from_code(c):
for val, pat in symbol_table: for val, pat in symbol_table:
aa_char = aa_char.replace(pat, val) c = c.replace(pat, val)
aa_char = aa_char.replace('+ ', '') if c.startswith(('u', 'U')):
m = re.match(r'^\d+', aa_char) base = 16
if m: c = c[1:]
ret += compat_chr(int(m.group(0), 8))
else: else:
m = re.match(r'^u([\da-f]+)', aa_char) base = 10
if m: c = int_or_none(c, base=base)
ret += compat_chr(int(m.group(1), 16)) return '' if c is None else compat_chr(c)
return ret
return ''.join(
chr_from_code(aa_char)
for aa_char in aa_code.split(delim))
class XFileShareIE(InfoExtractor): class XFileShareIE(InfoExtractor):
_SITES = ( _SITES = (
(r'aparat\.cam', 'Aparat'), # status check 2024-02: site availability, G site: search
(r'clipwatching\.com', 'ClipWatching'), (r'aparat\.cam', 'Aparat'), # Cloudflare says host error 522, apparently changed to wolfstreeam.tv
(r'gounlimited\.to', 'GoUnlimited'), (r'filemoon\.sx/.', 'FileMoon'),
(r'govid\.me', 'GoVid'), (r'gounlimited\.to', 'GoUnlimited'), # no media pages listed
(r'holavid\.com', 'HolaVid'), (r'govid\.me', 'GoVid'), # no media pages listed
(r'streamty\.com', 'Streamty'), (r'highstream\.tv', 'HighStream'), # clipwatching.com redirects here
(r'thevideobee\.to', 'TheVideoBee'), (r'holavid\.com', 'HolaVid'), # Cloudflare says host error 522
(r'uqload\.com', 'Uqload'), # (r'streamty\.com', 'Streamty'), # no media pages listed, connection timeout
(r'vidbom\.com', 'VidBom'), # (r'thevideobee\.to', 'TheVideoBee'), # no pages listed, refuses connection
(r'vidlo\.us', 'vidlo'), (r'uqload\.to', 'Uqload'), # .com, .co redirect here
(r'vidlocker\.xyz', 'VidLocker'), (r'(?:vedbam\.xyz|vadbam.net)', 'V?dB?m'), # vidbom.com redirects here, but no valid media pages listed
(r'vidshare\.tv', 'VidShare'), (r'vidlo\.us', 'vidlo'), # no valid media pages listed
(r'vup\.to', 'VUp'), (r'vidlocker\.xyz', 'VidLocker'), # no media pages listed
(r'(?:w\d\.)?viidshar\.com', 'VidShare'), # vidshare.tv redirects here
# (r'vup\.to', 'VUp'), # domain not found
(r'wolfstream\.tv', 'WolfStream'), (r'wolfstream\.tv', 'WolfStream'),
(r'xvideosharing\.com', 'XVideoSharing'), (r'xvideosharing\.com', 'XVideoSharing'), # just started showing 'maintenance mode'
) )
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1]) IE_DESC = 'XFileShare-based sites: %s' % ', '.join(list(zip(*_SITES))[1])
_VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
% '|'.join(site for site in list(zip(*_SITES))[0])) % '|'.join(site for site in list(zip(*_SITES))[0]))
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' % '|'.join(site for site in list(zip(*_SITES))[0])]
_FILE_NOT_FOUND_REGEXES = ( _FILE_NOT_FOUND_REGEXES = (
r'>(?:404 - )?File Not Found<', r'>(?:404 - )?File Not Found<',
r'>The file was removed by administrator<', r'>The file was removed by administrator<',
) )
_TITLE_REGEXES = (
r'style="z-index: [0-9]+;">([^<]+)</span>',
r'<td nowrap>([^<]+)</td>',
r'h4-fine[^>]*>([^<]+)<',
r'>Watch (.+)[ <]',
r'<h2 class="video-page-head">([^<]+)</h2>',
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to (dead)
r'title\s*:\s*"([^"]+)"', # govid.me
)
_SOURCE_URL_REGEXES = (
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
)
_THUMBNAIL_REGEXES = (
r'<video[^>]+poster="([^"]+)"',
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
)
_TESTS = [{ _TESTS = [{
'url': 'http://xvideosharing.com/fq65f94nd2ve', 'note': 'link in `sources`',
'md5': '4181f63957e8fe90ac836fa58dc3c8a6', 'url': 'https://uqload.to/dcsu06gdb45o',
'md5': '7f8db187b254379440bf4fcad094ae86',
'info_dict': { 'info_dict': {
'id': 'fq65f94nd2ve', 'id': 'dcsu06gdb45o',
'ext': 'mp4', 'ext': 'mp4',
'title': 'sample', 'title': 'f2e31015957e74c8c8427982e161c3fc mp4',
'thumbnail': r're:http://.*\.jpg', 'thumbnail': r're:https://.*\.jpg'
},
'params': {
'nocheckcertificate': True,
},
'expected_warnings': ['Unable to extract JWPlayer data'],
}, {
'note': 'link in decoded `sources`',
'url': 'https://xvideosharing.com/1tlg6agrrdgc',
'md5': '2608ce41932c1657ae56258a64e647d9',
'info_dict': {
'id': '1tlg6agrrdgc',
'ext': 'mp4',
'title': '0121',
'thumbnail': r're:https?://.*\.jpg',
},
'skip': 'This server is in maintenance mode.',
}, {
'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
'md5': '5a713742f57ac4aef29b74733e8dda01',
'info_dict': {
'id': 'dw40rxrzruqz',
'title': 'dw40rxrzruqz',
'ext': 'mp4'
},
}, {
'note': 'JWPlayer link in un-p,a,c,k,e,d JS',
'url': 'https://vadbam.net/6lnbkci96wly.html',
'md5': 'a1616800076177e2ac769203957c54bc',
'info_dict': {
'id': '6lnbkci96wly',
'title': 'Heart Crime S01 E03 weciima autos',
'ext': 'mp4'
},
}, {
'note': 'JWPlayer link in clear',
'url': 'https://w1.viidshar.com/nnibe0xf0h79.html',
'md5': 'f0a580ce9df06cc61b4a5c979d672367',
'info_dict': {
'id': 'nnibe0xf0h79',
'title': 'JaGa 68ar',
'ext': 'mp4'
},
'params': {
'skip_download': 'ffmpeg',
},
'expected_warnings': ['hlsnative has detected features it does not support'],
}, {
'note': 'JWPlayer link in clear',
'url': 'https://wolfstream.tv/a3drtehyrg52.html',
'md5': '1901d86a79c5e0c6a51bdc9a4cfd3769',
'info_dict': {
'id': 'a3drtehyrg52',
'title': 'NFL 2023 W04 DET@GB',
'ext': 'mp4'
}, },
}, { }, {
'url': 'https://aparat.cam/n4d6dh0wvlpr', 'url': 'https://aparat.cam/n4d6dh0wvlpr',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://wolfstream.tv/nthme29v9u2x', 'url': 'https://uqload.to/ug5somm0ctnk.html',
'only_matching': True,
}, {
'url': 'https://highstream.tv/2owiyz3sjoux',
'only_matching': True,
}, {
'url': 'https://vedbam.xyz/6lnbkci96wly.html',
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod @classmethod
def _extract_urls(webpage): def _extract_urls(cls, webpage):
return [
mobj.group('url') def yield_urls():
for mobj in re.finditer( for regex in cls._EMBED_REGEX:
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1' for mobj in re.finditer(regex, webpage):
% '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]), yield mobj.group('url')
webpage)]
return list(yield_urls())
def _real_extract(self, url): def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups() host, video_id = self._match_valid_url(url).group('host', 'id')
url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id) url = 'https://%s/%s' % (
host,
'embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
container_div = get_element_by_id('container', webpage) or webpage
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): if self._search_regex(
r'>This server is in maintenance mode\.', container_div,
'maint error', group=0, default=None):
raise ExtractorError(clean_html(container_div), expected=True)
if self._search_regex(
self._FILE_NOT_FOUND_REGEXES, container_div,
'missing video error', group=0, default=None):
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
fields = self._hidden_inputs(webpage) fields = self._hidden_inputs(webpage)
@ -122,59 +226,43 @@ class XFileShareIE(InfoExtractor):
'Content-type': 'application/x-www-form-urlencoded', 'Content-type': 'application/x-www-form-urlencoded',
}) })
title = (self._search_regex( title = (
(r'style="z-index: [0-9]+;">([^<]+)</span>', self._search_regex(self._TITLE_REGEXES, webpage, 'title', default=None)
r'<td nowrap>([^<]+)</td>', or self._og_search_title(webpage, default=None)
r'h4-fine[^>]*>([^<]+)<', or video_id).strip()
r'>Watch (.+)[ <]',
r'<h2 class="video-page-head">([^<]+)</h2>',
r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
r'title\s*:\s*"([^"]+)"'), # govid.me
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or video_id).strip()
obf_code = True
while obf_code:
for regex, func in ( for regex, func in (
(r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes), (r'(?s)(?<!-)\b(eval\(function\(p,a,c,k,e,d\)\{(?:(?!</script>).)+\)\))',
decode_packed_codes),
(r'(゚.+)', aa_decode)): (r'(゚.+)', aa_decode)):
obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None) obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
if obf_code: if obf_code:
webpage = webpage.replace(obf_code, func(obf_code)) webpage = webpage.replace(obf_code, func(obf_code))
break
formats = [] jwplayer_data = self._find_jwplayer_data(
webpage.replace(r'\'', '\''), video_id)
result = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False,
m3u8_id='hls', mpd_id='dash')
jwplayer_data = self._search_regex( if not traverse_obj(result, 'formats'):
[
r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
], webpage,
'jwplayer data', default=None)
if jwplayer_data: if jwplayer_data:
jwplayer_data = self._parse_json( self.report_warning(
jwplayer_data.replace(r"\'", "'"), video_id, js_to_json) 'Failed to extract JWPlayer formats', video_id=video_id)
if jwplayer_data: urls = set()
formats = self._parse_jwplayer_data( for regex in self._SOURCE_URL_REGEXES:
jwplayer_data, video_id, False,
m3u8_id='hls', mpd_id='dash')['formats']
if not formats:
urls = []
for regex in (
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
for mobj in re.finditer(regex, webpage): for mobj in re.finditer(regex, webpage):
video_url = mobj.group('url') urls.add(mobj.group('url'))
if video_url not in urls:
urls.append(video_url)
sources = self._search_regex( sources = self._search_regex(
r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None) r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
if sources: urls.update(traverse_obj(sources, (T(lambda s: self._parse_json(s, video_id)), Ellipsis)))
urls.extend(self._parse_json(sources, video_id))
formats = [] formats = []
for video_url in urls: for video_url in traverse_obj(urls, (Ellipsis, T(url_or_none))):
if determine_ext(video_url) == 'm3u8': if determine_ext(video_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', video_url, video_id, 'mp4',
@ -185,17 +273,19 @@ class XFileShareIE(InfoExtractor):
'url': video_url, 'url': video_url,
'format_id': 'sd', 'format_id': 'sd',
}) })
self._sort_formats(formats) result = {'formats': formats}
self._sort_formats(result['formats'])
thumbnail = self._search_regex( thumbnail = self._search_regex(
[ self._THUMBNAIL_REGEXES, webpage, 'thumbnail', default=None)
r'<video[^>]+poster="([^"]+)"',
r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
], webpage, 'thumbnail', default=None)
return { if not (title or result.get('title')):
title = self._generic_title(url) or video_id
return merge_dicts(result, {
'id': video_id, 'id': video_id,
'title': title, 'title': title or None,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'formats': formats, 'http_headers': {'Referer': url}
} })

View File

@ -106,6 +106,25 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
}, { }, {
'url': 'http://music.yandex.com/album/540508/track/4878838', 'url': 'http://music.yandex.com/album/540508/track/4878838',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://music.yandex.ru/album/16302456/track/85430762',
'md5': '11b8d50ab03b57738deeaadf661a0a48',
'info_dict': {
'id': '85430762',
'ext': 'mp3',
'abr': 128,
'title': 'Haddadi Von Engst, Phonic Youth, Super Flu - Til The End (Super Flu Remix)',
'filesize': int,
'duration': 431.14,
'track': 'Til The End (Super Flu Remix)',
'album': 'Til The End',
'album_artist': 'Haddadi Von Engst, Phonic Youth',
'artist': 'Haddadi Von Engst, Phonic Youth, Super Flu',
'release_year': 2021,
'genre': 'house',
'disc_number': 1,
'track_number': 2,
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -116,10 +135,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
'track', tld, url, track_id, 'Downloading track JSON', 'track', tld, url, track_id, 'Downloading track JSON',
{'track': '%s:%s' % (track_id, album_id)})['track'] {'track': '%s:%s' % (track_id, album_id)})['track']
track_title = track['title'] track_title = track['title']
track_version = track.get('version')
if track_version:
track_title = '%s (%s)' % (track_title, track_version)
download_data = self._download_json( download_data = self._download_json(
'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id), 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
track_id, 'Downloading track location url JSON', track_id, 'Downloading track location url JSON',
query={'hq': 1},
headers={'X-Retpath-Y': url}) headers={'X-Retpath-Y': url})
fd_data = self._download_json( fd_data = self._download_json(

View File

@ -1,20 +1,38 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import re import re
from time import sleep
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
extract_attributes, extract_attributes,
ExtractorError,
get_element_by_class,
get_element_by_id,
int_or_none, int_or_none,
str_to_int, merge_dicts,
parse_count,
parse_qs,
T,
traverse_obj,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin,
) )
class YouPornIE(InfoExtractor): class YouPornIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' _VALID_URL = (
r'youporn:(?P<id>\d+)',
r'''(?x)
https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)
(?:/(?:(?P<display_id>[^/?#&]+)/?)?)?(?:[#?]|$)
'''
)
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)']
_TESTS = [{ _TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2', 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@ -34,7 +52,7 @@ class YouPornIE(InfoExtractor):
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
}, },
'skip': 'This video has been disabled', 'skip': 'This video has been deactivated',
}, { }, {
# Unknown uploader # Unknown uploader
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -66,57 +84,104 @@ class YouPornIE(InfoExtractor):
}, { }, {
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/', 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.youporn.com/watch/16290308/tinderspecial-trailer1/',
'info_dict': {
'id': '16290308',
'age_limit': 18,
'categories': [],
'description': None, # SEO spam using title removed
'display_id': 'tinderspecial-trailer1',
'duration': 298.0,
'ext': 'mp4',
'upload_date': '20201123',
'uploader': 'Ersties',
'tags': [],
'thumbnail': 'https://fi1.ypncdn.com/m=eaSaaTbWx/202011/23/16290308/original/3.jpg',
'timestamp': 1606147564,
'title': 'Tinder In Real Life',
'view_count': int,
}
}] }]
@staticmethod @classmethod
def _extract_urls(webpage): def _extract_urls(cls, webpage):
return re.findall( def yield_urls():
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)', for p in cls._EMBED_REGEX:
webpage) for m in re.finditer(p, webpage):
yield m.group('url')
return list(yield_urls())
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) # A different video ID (data-video-id) is hidden in the page but
video_id = mobj.group('id') # never seems to be used
display_id = mobj.group('display_id') or video_id video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
url = 'http://www.youporn.com/watch/%s' % (video_id,)
webpage = self._download_webpage(
url, video_id, headers={'Cookie': 'age_verified=1'})
definitions = self._download_json( watchable = self._search_regex(
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id, r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
display_id) webpage, 'watchability', default=None)
if not watchable:
msg = re.split(r'\s{4}', clean_html(get_element_by_id(
'mainContent', webpage)) or '')[0]
raise ExtractorError(
('%s says: %s' % (self.IE_NAME, msg))
if msg else 'Video unavailable: no reason found',
expected=True)
# internal ID ?
# video_id = extract_attributes(watchable).get('data-video-id')
playervars = self._search_json(
r'\bplayervars\s*:', webpage, 'playervars', video_id)
def get_fmt(x):
v_url = url_or_none(x.get('videoUrl'))
if v_url:
x['videoUrl'] = v_url
return (x['format'], x)
defs_by_format = dict(traverse_obj(playervars, (
'mediaDefinitions', lambda _, v: v.get('format'), T(get_fmt))))
def get_format_data(f):
if f not in defs_by_format:
return []
return self._download_json(
defs_by_format[f]['videoUrl'], video_id, '{0}-formats'.format(f))
formats = [] formats = []
for definition in definitions: # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
if not isinstance(definition, dict): for hls_url in traverse_obj(
continue get_format_data('hls'),
video_url = url_or_none(definition.get('videoUrl')) (lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'),
if not video_url: (Ellipsis, 'videoUrl')):
continue formats.extend(self._extract_m3u8_formats(
f = { hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls',
'url': video_url, entry_protocol='m3u8_native'))
'filesize': int_or_none(definition.get('videoSize')),
} for f in traverse_obj(get_format_data('mp4'), (
height = int_or_none(definition.get('quality')) lambda _, v: v.get('videoUrl'), {
'url': ('videoUrl', T(url_or_none)),
'filesize': ('videoSize', T(int_or_none)),
'height': ('quality', T(int_or_none)),
}, T(lambda x: x.get('videoUrl') and x))):
# Video URL's path looks like this: # Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4 # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata # We will benefit from it by extracting some metadata
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url) mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', f['videoUrl'])
if mobj: if mobj:
if not height: if not f.get('height'):
height = int(mobj.group('height')) f['height'] = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate')) f['tbr'] = int(mobj.group('bitrate'))
f.update({ f['format_id'] = '%dp-%dk' % (f['height'], f['tbr'])
'format_id': '%dp-%dk' % (height, bitrate),
'tbr': bitrate,
})
f['height'] = height
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
headers={'Cookie': 'age_verified=1'})
title = self._html_search_regex( title = self._html_search_regex(
r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>', r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
webpage, 'title', default=None) or self._og_search_title( webpage, 'title', default=None) or self._og_search_title(
@ -131,6 +196,8 @@ class YouPornIE(InfoExtractor):
thumbnail = self._search_regex( thumbnail = self._search_regex(
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
webpage, 'thumbnail', fatal=False, group='thumbnail') webpage, 'thumbnail', fatal=False, group='thumbnail')
duration = traverse_obj(playervars, ('duration', T(int_or_none)))
if duration is None:
duration = int_or_none(self._html_search_meta( duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'duration', fatal=False)) 'video:duration', webpage, 'duration', fatal=False))
@ -148,11 +215,11 @@ class YouPornIE(InfoExtractor):
view_count = None view_count = None
views = self._search_regex( views = self._search_regex(
r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage, r'(<div\s[^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>',
'views', default=None) webpage, 'views', default=None)
if views: if views:
view_count = str_to_int(extract_attributes(views).get('data-value')) view_count = parse_count(extract_attributes(views).get('data-value'))
comment_count = str_to_int(self._search_regex( comment_count = parse_count(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)', r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', default=None)) webpage, 'comment count', default=None))
@ -168,7 +235,10 @@ class YouPornIE(InfoExtractor):
r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>', r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
'tags') 'tags')
return { data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) or {}
data.pop('url', None)
result = merge_dicts(data, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -183,4 +253,442 @@ class YouPornIE(InfoExtractor):
'tags': tags, 'tags': tags,
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
} })
# Remove promotional non-description
if result.get('description', '').startswith(
'Watch %s online' % (result['title'],)):
del result['description']
return result
class YouPornListBase(InfoExtractor):
# pattern in '.title-text' element of page section containing videos
_PLAYLIST_TITLEBAR_RE = r'\s+[Vv]ideos\s*$'
_PAGE_RETRY_COUNT = 0 # ie, no retry
_PAGE_RETRY_DELAY = 2 # seconds
def _get_next_url(self, url, pl_id, html):
return urljoin(url, self._search_regex(
r'''<a\s[^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''',
get_element_by_id('next', html) or '', 'next page',
group='url', default=None))
@classmethod
def _get_title_from_slug(cls, title_slug):
return re.sub(r'[_-]', ' ', title_slug)
def _entries(self, url, pl_id, html=None, page_num=None):
# separates page sections
PLAYLIST_SECTION_RE = (
r'''<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?title-bar(?:\s+[\w$-]+|\s)*\1[^>]*>'''
)
# contains video link
VIDEO_URL_RE = r'''(?x)
<div\s[^>]*\bdata-video-id\s*=\s*('|")\d+\1[^>]*>\s*
(?:<div\b[\s\S]+?</div>\s*)*
<a\s[^>]*\bhref\s*=\s*('|")(?P<url>(?:(?!\2)[^>])+)\2
'''
def yield_pages(url, html=html, page_num=page_num):
fatal = not html
for pnum in itertools.count(start=page_num or 1):
if not html:
html = self._download_webpage(
url, pl_id, note='Downloading page %d' % pnum,
fatal=fatal)
if not html:
break
fatal = False
yield (url, html, pnum)
# explicit page: extract just that page
if page_num is not None:
break
next_url = self._get_next_url(url, pl_id, html)
if not next_url or next_url == url:
break
url, html = next_url, None
def retry_page(msg, tries_left, page_data):
if tries_left <= 0:
return
self.report_warning(msg, pl_id)
sleep(self._PAGE_RETRY_DELAY)
return next(
yield_pages(page_data[0], page_num=page_data[2]), None)
def yield_entries(html):
for frag in re.split(PLAYLIST_SECTION_RE, html):
if not frag:
continue
t_text = get_element_by_class('title-text', frag or '')
if not (t_text and re.search(self._PLAYLIST_TITLEBAR_RE, t_text)):
continue
for m in re.finditer(VIDEO_URL_RE, frag):
video_url = urljoin(url, m.group('url'))
if video_url:
yield self.url_result(video_url)
last_first_url = None
for page_data in yield_pages(url, html=html, page_num=page_num):
# page_data: url, html, page_num
first_url = None
tries_left = self._PAGE_RETRY_COUNT + 1
while tries_left > 0:
tries_left -= 1
for from_ in yield_entries(page_data[1]):
# may get the same page twice instead of empty page
# or (site bug) intead of actual next page
if not first_url:
first_url = from_['url']
if first_url == last_first_url:
# sometimes (/porntags/) the site serves the previous page
# instead but may provide the correct page after a delay
page_data = retry_page(
'Retrying duplicate page...', tries_left, page_data)
if page_data:
first_url = None
break
continue
yield from_
else:
if not first_url and 'no-result-paragarph1' in page_data[1]:
page_data = retry_page(
'Retrying empty page...', tries_left, page_data)
if page_data:
continue
else:
# success/failure
break
# may get an infinite (?) sequence of empty pages
if not first_url:
break
last_first_url = first_url
def _real_extract(self, url, html=None):
# exceptionally, id may be None
m_dict = self._match_valid_url(url).groupdict()
pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
qs = parse_qs(url)
for q, v in qs.items():
if v:
qs[q] = v[-1]
else:
del qs[q]
base_id = pl_id or 'YouPorn'
title = self._get_title_from_slug(base_id)
if page_type:
title = '%s %s' % (page_type.capitalize(), title)
base_id = [base_id.lower()]
if sort is None:
title += ' videos'
else:
title = '%s videos by %s' % (title, re.sub(r'[_-]', ' ', sort))
base_id.append(sort)
if qs:
ps = ['%s=%s' % item for item in sorted(qs.items())]
title += ' (%s)' % ','.join(ps)
base_id.extend(ps)
pl_id = '/'.join(base_id)
return self.playlist_result(
self._entries(url, pl_id, html=html,
page_num=int_or_none(qs.get('page'))),
playlist_id=pl_id, playlist_title=title)
class YouPornCategoryIE(YouPornListBase):
IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>category)/(?P<id>[^/?#&]+)
(?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$)
'''
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/category/lingerie/popular/',
'info_dict': {
'id': 'lingerie/popular',
'title': 'Category lingerie videos by popular',
},
'playlist_mincount': 39,
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/category/lingerie/duration/?min_minutes=10',
'info_dict': {
'id': 'lingerie/duration/min_minutes=10',
'title': 'Category lingerie videos by duration (min_minutes=10)',
},
'playlist_maxcount': 30,
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/category/lingerie/popular?page=1',
'info_dict': {
'id': 'lingerie/popular/page=1',
'title': 'Category lingerie videos by popular (page=1)',
},
'playlist_count': 30,
}]
class YouPornChannelIE(YouPornListBase):
IE_DESC = 'YouPorn channel, with sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>channel)/(?P<id>[^/?#&]+)
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
'''
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/channel/x-feeds/',
'info_dict': {
'id': 'x-feeds',
'title': 'Channel X-Feeds videos',
},
'playlist_mincount': 37,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
'info_dict': {
'id': 'x-feeds/duration/page=1',
'title': 'Channel X-Feeds videos by duration (page=1)',
},
'playlist_count': 24,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return re.sub(r'_', ' ', title_slug).title()
class YouPornCollectionIE(YouPornListBase):
IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>collection)s/videos/(?P<id>\d+)
(?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+in\s'
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/collections/videos/33044251/',
'info_dict': {
'id': '33044251',
'title': 'Collection Sexy Lips videos',
'uploader': 'ph-littlewillyb',
},
'playlist_mincount': 50,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
'info_dict': {
'id': '33044251/time/page=1',
'title': 'Collection Sexy Lips videos by time (page=1)',
'uploader': 'ph-littlewillyb',
},
'playlist_count': 20,
}]
def _real_extract(self, url):
pl_id = self._match_id(url)
html = self._download_webpage(url, pl_id)
playlist = super(YouPornCollectionIE, self)._real_extract(url, html=html)
infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
'collection-infos', html)) or '')
title, uploader = self._search_regex(
r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
return merge_dicts({
'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
'uploader': uploader,
}, playlist) if title else playlist
class YouPornTagIE(YouPornListBase):
IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
porn(?P<type>tag)s/(?P<id>[^/?#&]+)
(?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+tagged\s'
_PAGE_RETRY_COUNT = 1
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/porntags/austrian',
'info_dict': {
'id': 'austrian',
'title': 'Tag austrian videos',
},
'playlist_mincount': 35,
'expected_warnings': ['Retrying duplicate page'],
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
'info_dict': {
'id': 'austrian/duration/min_minutes=10',
'title': 'Tag austrian videos by duration (min_minutes=10)',
},
# number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
# or more, varying with number of ads; let's set max as 9x4
# NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
'playlist_maxcount': 32,
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/porntags/austrian/?page=1',
'info_dict': {
'id': 'austrian/page=1',
'title': 'Tag austrian videos (page=1)',
},
'playlist_mincount': 32,
'playlist_maxcount': 34,
'expected_warnings': ['Retrying duplicate page', 'Retrying empty page'],
}]
# YP tag navigation is broken, loses sort
def _get_next_url(self, url, pl_id, html):
next_url = super(YouPornTagIE, self)._get_next_url(url, pl_id, html)
if next_url:
n = self._match_valid_url(next_url)
if n:
s = n.groupdict().get('sort')
if s:
u = self._match_valid_url(url)
if u:
u = u.groupdict().get('sort')
if s and not u:
n = n.end('sort')
next_url = next_url[:n] + '/' + u + next_url[n:]
return next_url
class YouPornStarIE(YouPornListBase):
IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?P<type>pornstar)/(?P<id>[^/?#&]+)
(?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'^\s*Videos\s+[fF]eaturing\s'
_TESTS = [{
'note': 'Full list with pagination',
'url': 'https://www.youporn.com/pornstar/daynia/',
'info_dict': {
'id': 'daynia',
'title': 'Pornstar Daynia videos',
'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
},
'playlist_mincount': 45,
}, {
'note': 'Single page of full list (no filters here)',
'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
'info_dict': {
'id': 'daynia/page=1',
'title': 'Pornstar Daynia videos (page=1)',
'description': 're:.{180,}',
},
'playlist_count': 26,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return re.sub(r'_', ' ', title_slug).title()
def _real_extract(self, url):
pl_id = self._match_id(url)
html = self._download_webpage(url, pl_id)
playlist = super(YouPornStarIE, self)._real_extract(url, html=html)
INFO_ELEMENT_RE = r'''(?x)
<div\s[^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
(?P<info>[\s\S]+?)(?:</div>\s*){6,}
'''
infos = self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default='')
if infos:
infos = re.sub(
r'(?:\s*nl=nl)+\s*', ' ',
re.sub(r'(?u)\s+', ' ', clean_html(
re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '')
return merge_dicts({
'description': infos.strip() or None,
}, playlist)
class YouPornVideosIE(YouPornListBase):
IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination'
_VALID_URL = r'''(?x)
https?://(?:www\.)?youporn\.com/
(?:(?P<id>browse)/)?
(?P<sort>(?(id)
(?:duration|rating|time|views)|
(?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
(?:[/#?]|$)
'''
_PLAYLIST_TITLEBAR_RE = r'\s+(?:[Vv]ideos|VIDEOS)\s*$'
_TESTS = [{
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/',
'info_dict': {
'id': 'youporn',
'title': 'YouPorn videos',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/recommended',
'info_dict': {
'id': 'youporn/recommended',
'title': 'YouPorn videos by recommended',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/top_rated',
'info_dict': {
'id': 'youporn/top_rated',
'title': 'YouPorn videos by top rated',
},
'only_matching': True,
}, {
'note': 'Full list with pagination (too long for test)',
'url': 'https://www.youporn.com/browse/time',
'info_dict': {
'id': 'browse/time',
'title': 'YouPorn videos by time',
},
'only_matching': True,
}, {
'note': 'Filtered paginated list with single page result',
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2',
'info_dict': {
'id': 'youporn/most_favorited/max_minutes=2/res=VR',
'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)',
},
'playlist_mincount': 10,
'playlist_maxcount': 28,
}, {
'note': 'Filtered paginated list with several pages',
'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5',
'info_dict': {
'id': 'youporn/most_favorited/max_minutes=5/res=VR',
'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)',
},
'playlist_mincount': 45,
}, {
'note': 'Single page of full list',
'url': 'https://www.youporn.com/browse/time?page=1',
'info_dict': {
'id': 'browse/time/page=1',
'title': 'YouPorn videos by time (page=1)',
},
'playlist_count': 36,
}]
@staticmethod
def _get_title_from_slug(title_slug):
return 'YouPorn' if title_slug == 'browse' else title_slug

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import collections
import itertools import itertools
import json import json
import os.path import os.path
@ -23,10 +24,10 @@ from ..compat import (
) )
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..utils import ( from ..utils import (
ExtractorError,
clean_html, clean_html,
dict_get, dict_get,
error_to_compat_str, error_to_compat_str,
ExtractorError,
float_or_none, float_or_none,
extract_attributes, extract_attributes,
get_element_by_attribute, get_element_by_attribute,
@ -36,7 +37,9 @@ from ..utils import (
LazyList, LazyList,
merge_dicts, merge_dicts,
mimetype2ext, mimetype2ext,
NO_DEFAULT,
parse_codecs, parse_codecs,
parse_count,
parse_duration, parse_duration,
parse_qs, parse_qs,
qualities, qualities,
@ -44,7 +47,9 @@ from ..utils import (
smuggle_url, smuggle_url,
str_or_none, str_or_none,
str_to_int, str_to_int,
T,
traverse_obj, traverse_obj,
try_call,
try_get, try_get,
txt_or_none, txt_or_none,
unescapeHTML, unescapeHTML,
@ -260,16 +265,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
cookies = self._get_cookies('https://www.youtube.com/') cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'): if cookies.get('__Secure-3PSID'):
return return
consent_id = None socs = cookies.get('SOCS')
consent = cookies.get('CONSENT') if socs and not socs.value.startswith('CAA'): # not consented
if consent:
if 'YES' in consent.value:
return return
consent_id = self._search_regex( self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
r'PENDING\+(\d+)', consent.value, 'consent', default=None)
if not consent_id:
consent_id = random.randint(100, 999)
self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
def _real_initialize(self): def _real_initialize(self):
self._initialize_consent() self._initialize_consent()
@ -1253,7 +1252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': 'IMG 3456', 'title': 'IMG 3456',
'description': '', 'description': '',
'upload_date': '20170613', 'upload_date': '20170613',
'uploader': 'ElevageOrVert', 'uploader': "l'Or Vert asbl",
'uploader_id': '@ElevageOrVert', 'uploader_id': '@ElevageOrVert',
}, },
'params': { 'params': {
@ -1466,6 +1465,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._code_cache = {} self._code_cache = {}
self._player_cache = {} self._player_cache = {}
# *ytcfgs, webpage=None
def _extract_player_url(self, *ytcfgs, **kw_webpage):
if ytcfgs and not isinstance(ytcfgs[0], dict):
webpage = kw_webpage.get('webpage') or ytcfgs[0]
if webpage:
player_url = self._search_regex(
r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
webpage or '', 'player URL', fatal=False)
if player_url:
ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
return traverse_obj(
ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
def _download_player_url(self, video_id, fatal=False):
res = self._download_webpage(
'https://www.youtube.com/iframe_api',
note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
player_version = self._search_regex(
r'player\\?/([0-9a-fA-F]{8})\\?/', res or '', 'player version', fatal=fatal,
default=NO_DEFAULT if res else None)
if player_version:
return 'https://www.youtube.com/s/player/{0}/player_ias.vflset/en_US/base.js'.format(player_version)
def _signature_cache_id(self, example_sig): def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """ """ Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
@ -1480,46 +1503,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Cannot identify player %r' % player_url) raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id') return id_m.group('id')
def _get_player_code(self, video_id, player_url, player_id=None): def _load_player(self, video_id, player_url, fatal=True, player_id=None):
if not player_id: if not player_id:
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
if player_id not in self._code_cache: if player_id not in self._code_cache:
self._code_cache[player_id] = self._download_webpage( code = self._download_webpage(
player_url, video_id, player_url, video_id, fatal=fatal,
note='Downloading player ' + player_id, note='Downloading player ' + player_id,
errnote='Download of %s failed' % player_url) errnote='Download of %s failed' % player_url)
return self._code_cache[player_id] if code:
self._code_cache[player_id] = code
return self._code_cache[player_id] if fatal else self._code_cache.get(player_id)
def _extract_signature_function(self, video_id, player_url, example_sig): def _extract_signature_function(self, video_id, player_url, example_sig):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
# Read from filesystem cache # Read from filesystem cache
func_id = 'js_%s_%s' % ( func_id = 'js_{0}_{1}'.format(
player_id, self._signature_cache_id(example_sig)) player_id, self._signature_cache_id(example_sig))
assert os.path.basename(func_id) == func_id assert os.path.basename(func_id) == func_id
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) self.write_debug('Extracting signature function {0}'.format(func_id))
if cache_spec is not None: cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
if not cache_spec:
code = self._load_player(video_id, player_url, player_id)
if code:
res = self._parse_sig_js(code)
test_string = ''.join(map(compat_chr, range(len(example_sig))))
cache_spec = [ord(c) for c in res(test_string)]
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
return lambda s: ''.join(s[i] for i in cache_spec) return lambda s: ''.join(s[i] for i in cache_spec)
code = self._get_player_code(video_id, player_url, player_id)
res = self._parse_sig_js(code)
test_string = ''.join(map(compat_chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
return res
def _print_sig_code(self, func, example_sig): def _print_sig_code(self, func, example_sig):
if not self.get_param('youtube_print_sig_code'):
return
def gen_sig_code(idxs): def gen_sig_code(idxs):
def _genslice(start, end, step): def _genslice(start, end, step):
starts = '' if start == 0 else str(start) starts = '' if start == 0 else str(start)
ends = (':%d' % (end + step)) if end + step >= 0 else ':' ends = (':%d' % (end + step)) if end + step >= 0 else ':'
steps = '' if step == 1 else (':%d' % step) steps = '' if step == 1 else (':%d' % step)
return 's[%s%s%s]' % (starts, ends, steps) return 's[{0}{1}{2}]'.format(starts, ends, steps)
step = None step = None
# Quelch pyflakes warnings - start will be set when step is set # Quelch pyflakes warnings - start will be set when step is set
@ -1570,143 +1596,166 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
jscode, 'Initial JS player signature function name', group='sig') jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode) jsi = JSInterpreter(jscode)
initial_function = jsi.extract_function(funcname) initial_function = jsi.extract_function(funcname)
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _cached(self, func, *cache_id):
def inner(*args, **kwargs):
if cache_id not in self._player_cache:
try:
self._player_cache[cache_id] = func(*args, **kwargs)
except ExtractorError as e:
self._player_cache[cache_id] = e
except Exception as e:
self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
ret = self._player_cache[cache_id]
if isinstance(ret, Exception):
raise ret
return ret
return inner
def _decrypt_signature(self, s, video_id, player_url): def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature""" """Turn the encrypted s field into a working signature"""
extract_sig = self._cached(
if player_url is None: self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
raise ExtractorError('Cannot decrypt signature without player_url') func = extract_sig(video_id, player_url, s)
try:
player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
func = self._extract_signature_function(
video_id, player_url, s
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
if self._downloader.params.get('youtube_print_sig_code'):
self._print_sig_code(func, s) self._print_sig_code(func, s)
return func(s) return func(s)
except Exception as e:
tb = traceback.format_exc()
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
def _extract_player_url(self, webpage):
player_url = self._search_regex(
r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
webpage or '', 'player URL', fatal=False)
if not player_url:
return
if player_url.startswith('//'):
player_url = 'https:' + player_url
elif not re.match(r'https?://', player_url):
player_url = compat_urllib_parse.urljoin(
'https://www.youtube.com', player_url)
return player_url
# from yt-dlp # from yt-dlp
# See also: # See also:
# 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419 # 1. https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-894619419
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116 # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377 # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
def _extract_n_function_name(self, jscode): def _decrypt_nsig(self, n, video_id, player_url):
target = r'(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\[(?P<idx>\d+)\])?' """Turn the encrypted n field into a working signature"""
nfunc_and_idx = self._search_regex( if player_url is None:
r'\.get\("n"\)\)&&\(b=(%s)\([\w$]+\)' % (target, ), raise ExtractorError('Cannot decrypt nsig without player_url')
jscode, 'Initial JS player n function name')
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx:
return nfunc
VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P<name>\[(?P<alias>%s)\])[;,]'
note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx)
def search_function_code(needle, group):
return self._search_regex(
VAR_RE_TMPL % (re.escape(nfunc), needle), jscode,
note.format(group), group=group)
if int_or_none(idx) == 0:
real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias')
if real_nfunc:
return real_nfunc
return self._parse_json(
search_function_code('.+?', group='name'),
nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self._downloader.cache.load('youtube-nsig', player_id)
if func_code:
jsi = JSInterpreter(func_code)
else:
jscode = self._get_player_code(video_id, player_url, player_id)
funcname = self._extract_n_function_name(jscode)
jsi = JSInterpreter(jscode)
func_code = jsi.extract_function_code(funcname)
self._downloader.cache.store('youtube-nsig', player_id, func_code)
if self._downloader.params.get('youtube_print_sig_code'):
self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(player_id, func_code[1]))
return lambda s: jsi.extract_function_from_code(*func_code)([s])
def _n_descramble(self, n_param, player_url, video_id):
"""Compute the response to YT's "n" parameter challenge,
or None
Args:
n_param -- challenge string that is the value of the
URL's "n" query parameter
player_url -- URL of YT player JS
video_id
"""
sig_id = ('nsig_value', n_param)
if sig_id in self._player_cache:
return self._player_cache[sig_id]
try: try:
player_id = ('nsig', player_url) jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
if player_id not in self._player_cache: except ExtractorError as e:
self._player_cache[player_id] = self._extract_n_function(video_id, player_url) raise ExtractorError('Unable to extract nsig function code', cause=e)
func = self._player_cache[player_id] if self.get_param('youtube_print_sig_code'):
ret = func(n_param) self.to_screen('Extracted nsig function from {0}:\n{1}\n'.format(
if ret.startswith('enhanced_except_'): player_id, func_code[1]))
raise ExtractorError('Unhandled exception in decode')
self._player_cache[sig_id] = ret try:
if self._downloader.params.get('verbose', False): extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id]))) ret = extract_nsig(jsi, func_code)(n)
return self._player_cache[sig_id] except JSInterpreter.Exception as e:
except Exception as e: self.report_warning(
self._downloader.report_warning( '%s (%s %s)' % (
'[%s] %s (%s %s)' % ( 'Unable to decode n-parameter: expect download to be blocked or throttled',
self.IE_NAME, error_to_compat_str(e),
'Unable to decode n-parameter: download likely to be throttled', traceback.format_exc()),
error_to_compat_str(e), video_id=video_id)
traceback.format_exc())) return
self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
return ret
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
# or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x)
\((?:[\w$()\s]+,)*?\s* # (
(?P<b>[a-z])\s*=\s* # b=
(?:
(?: # expect ,c=a.get(b) (etc)
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
"n+"\[\s*\+?s*[\w$.]+\s*]
)\s*(?:,[\w$()\s]+(?=,))*|
(?P<old>[\w$]+) # a (old[er])
)\s*
(?(old)
# b.get("n")
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name:
self.report_warning('Falling back to generic n function search')
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_
''', jscode, 'Initial JS player n function name', group='name')
if not idx:
return func_name
return self._parse_json(self._search_regex(
r'var\s+{0}\s*=\s*(\[.+?\])\s*[,;]'.format(re.escape(func_name)), jscode,
'Initial JS player n function list ({0}.{1})'.format(func_name, idx)),
func_name, transform_source=js_to_json)[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
func_code = self.cache.load('youtube-nsig', player_id)
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
if func_code:
return jsi, player_id, func_code
func_name = self._extract_n_function_name(jscode)
func_code = jsi.extract_function_code(func_name)
self.cache.store('youtube-nsig', player_id, func_code)
return jsi, player_id, func_code
def _extract_n_function_from_code(self, jsi, func_code):
func = jsi.extract_function_from_code(*func_code)
def extract_nsig(s):
try:
ret = func([s])
except JSInterpreter.Exception:
raise
except Exception as e:
raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
if ret.startswith('enhanced_except_'):
raise JSInterpreter.Exception('Signature function returned an exception')
return ret
return extract_nsig
def _unthrottle_format_urls(self, video_id, player_url, *formats):
def decrypt_nsig(n):
return self._cached(self._decrypt_nsig, 'nsig', n, player_url)
def _unthrottle_format_urls(self, video_id, player_url, formats):
for fmt in formats: for fmt in formats:
parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url']) parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
n_param = compat_parse_qs(parsed_fmt_url.query).get('n') n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
if not n_param: if not n_param:
continue continue
n_param = n_param[-1] n_param = n_param[-1]
n_response = self._n_descramble(n_param, player_url, video_id) n_response = decrypt_nsig(n_param)(n_param, video_id, player_url)
if n_response is None: if n_response is None:
# give up if descrambling failed # give up if descrambling failed
break break
for fmt_dct in traverse_obj(fmt, (None, (None, ('fragments', Ellipsis))), expected_type=dict): fmt['url'] = update_url_query(fmt['url'], {'n': n_response})
fmt_dct['url'] = update_url(
fmt_dct['url'], query_update={'n': [n_response]})
# from yt-dlp, with tweaks # from yt-dlp, with tweaks
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
@ -1714,16 +1763,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
Extract signatureTimestamp (sts) Extract signatureTimestamp (sts)
Required to tell API what sig/player version is in use. Required to tell API what sig/player version is in use.
""" """
sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None sts = traverse_obj(ytcfg, 'STS', expected_type=int)
if not sts: if not sts:
# Attempt to extract from player # Attempt to extract from player
if player_url is None: if player_url is None:
error_msg = 'Cannot extract signature timestamp without player_url.' error_msg = 'Cannot extract signature timestamp without player_url.'
if fatal: if fatal:
raise ExtractorError(error_msg) raise ExtractorError(error_msg)
self._downloader.report_warning(error_msg) self.report_warning(error_msg)
return return
code = self._get_player_code(video_id, player_url) code = self._load_player(video_id, player_url, fatal=fatal)
sts = int_or_none(self._search_regex( sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '', r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
'JS player signature timestamp', group='sts', fatal=fatal)) 'JS player signature timestamp', group='sts', fatal=fatal))
@ -1739,12 +1788,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# cpn generation algorithm is reverse engineered from base.js. # cpn generation algorithm is reverse engineered from base.js.
# In fact it works even with dummy cpn. # In fact it works even with dummy cpn.
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
playback_url = update_url( # more consistent results setting it to right before the end
playback_url, query_update={ qs = parse_qs(playback_url)
'ver': ['2'], video_length = '{0}'.format(float((qs.get('len') or ['1.5'])[0]) - 1)
'cpn': [cpn],
playback_url = update_url_query(
playback_url, {
'ver': '2',
'cpn': cpn,
'cmt': video_length,
'el': 'detailpage', # otherwise defaults to "shorts"
}) })
self._download_webpage( self._download_webpage(
@ -1992,8 +2047,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else: else:
self.to_screen('Downloading just video %s because of --no-playlist' % video_id) self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
if not player_url:
player_url = self._extract_player_url(webpage)
formats = [] formats = []
itags = [] itags = collections.defaultdict(set)
itag_qualities = {} itag_qualities = {}
q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres']) q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
CHUNK_SIZE = 10 << 20 CHUNK_SIZE = 10 << 20
@ -2009,58 +2067,92 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}) })
} for range_start in range(0, f['filesize'], CHUNK_SIZE)) } for range_start in range(0, f['filesize'], CHUNK_SIZE))
lower = lambda s: s.lower()
for fmt in streaming_formats: for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): if fmt.get('targetDurationSec'):
continue continue
itag = str_or_none(fmt.get('itag')) itag = str_or_none(fmt.get('itag'))
quality = fmt.get('quality') audio_track = traverse_obj(fmt, ('audioTrack', T(dict))) or {}
if itag and quality:
quality = traverse_obj(fmt, ((
# The 3gp format (17) in android client has a quality of "small",
# but is actually worse than other formats
T(lambda _: 'tiny' if itag == 17 else None),
('quality', T(lambda q: q if q and q != 'tiny' else None)),
('audioQuality', T(lower)),
'quality'), T(txt_or_none)), get_all=False)
if quality and itag:
itag_qualities[itag] = quality itag_qualities[itag] = quality
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the # (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`) # number of fragments that would subsequently be requested with (`&sq=N`)
if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF': if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
continue continue
fmt_url = fmt.get('url') fmt_url = fmt.get('url')
if not fmt_url: if not fmt_url:
sc = compat_parse_qs(fmt.get('signatureCipher')) sc = compat_parse_qs(fmt.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0])) fmt_url = traverse_obj(sc, ('url', -1, T(url_or_none)))
encrypted_sig = try_get(sc, lambda x: x['s'][0]) encrypted_sig = traverse_obj(sc, ('s', -1))
if not (sc and fmt_url and encrypted_sig): if not (fmt_url and encrypted_sig):
continue continue
if not player_url: player_url = player_url or self._extract_player_url(webpage)
player_url = self._extract_player_url(webpage)
if not player_url: if not player_url:
continue continue
signature = self._decrypt_signature(sc['s'][0], video_id, player_url) try:
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' fmt_url = update_url_query(fmt_url, {
fmt_url += '&' + sp + '=' + signature traverse_obj(sc, ('sp', -1)) or 'signature':
[self._decrypt_signature(encrypted_sig, video_id, player_url)],
})
except ExtractorError as e:
self.report_warning('Signature extraction failed: Some formats may be missing',
video_id=video_id, only_once=True)
self.write_debug(error_to_compat_str(e), only_once=True)
continue
if itag: language_preference = (
itags.append(itag) 10 if audio_track.get('audioIsDefault')
tbr = float_or_none( else -10 if 'descriptive' in (traverse_obj(audio_track, ('displayName', T(lower))) or '')
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) else -1)
name = (
traverse_obj(fmt, ('qualityLabel', T(txt_or_none)))
or quality.replace('audio_quality_', ''))
dct = { dct = {
'asr': int_or_none(fmt.get('audioSampleRate')), 'format_id': join_nonempty(itag, fmt.get('isDrc') and 'drc'),
'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag,
'format_note': fmt.get('qualityLabel') or quality,
'fps': int_or_none(fmt.get('fps')),
'height': int_or_none(fmt.get('height')),
'quality': q(quality),
'tbr': tbr,
'url': fmt_url, 'url': fmt_url,
'width': fmt.get('width'), # Format 22 is likely to be damaged: see https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': ((-5 if itag == '22' else -1)
+ (100 if 'Premium' in name else 0)),
'quality': q(quality),
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else '') or None,
'language_preference': language_preference,
# Strictly de-prioritize 3gp formats
'preference': -2 if itag == '17' else None,
} }
mimetype = fmt.get('mimeType') if itag:
if mimetype: itags[itag].add(('https', dct.get('language')))
mobj = re.match( self._unthrottle_format_urls(video_id, player_url, dct)
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype) dct.update(traverse_obj(fmt, {
if mobj: 'asr': ('audioSampleRate', T(int_or_none)),
dct['ext'] = mimetype2ext(mobj.group(1)) 'filesize': ('contentLength', T(int_or_none)),
dct.update(parse_codecs(mobj.group(2))) 'format_note': ('qualityLabel', T(lambda x: x or quality)),
# for some formats, fps is wrongly returned as 1
'fps': ('fps', T(int_or_none), T(lambda f: f if f > 1 else None)),
'audio_channels': ('audioChannels', T(int_or_none)),
'height': ('height', T(int_or_none)),
'has_drm': ('drmFamilies', T(bool)),
'tbr': (('averageBitrate', 'bitrate'), T(lambda t: float_or_none(t, 1000))),
'width': ('width', T(int_or_none)),
'_duration_ms': ('approxDurationMs', T(int_or_none)),
}, get_all=False))
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
if mime_mobj:
dct['ext'] = mimetype2ext(mime_mobj.group(1))
dct.update(parse_codecs(mime_mobj.group(2)))
single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec')) single_stream = 'none' in (dct.get(c) for c in ('acodec', 'vcodec'))
if single_stream and dct.get('ext'): if single_stream and dct.get('ext'):
dct['container'] = dct['ext'] + '_dash' dct['container'] = dct['ext'] + '_dash'
@ -2075,14 +2167,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats.append(dct) formats.append(dct)
def process_manifest_format(f, proto, client_name, itag, all_formats=False):
key = (proto, f.get('language'))
if not all_formats and key in itags[itag]:
return False
itags[itag].add(key)
if itag:
f['format_id'] = (
'{0}-{1}'.format(itag, proto)
if all_formats or any(p != proto for p, _ in itags[itag])
else itag)
if f.get('source_preference') is None:
f['source_preference'] = -1
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
f['source_preference'] += 100
f['quality'] = q(traverse_obj(f, (
'format_id', T(lambda s: itag_qualities[s.split('-')[0]])), default=-1))
if try_call(lambda: f['fps'] <= 1):
del f['fps']
if proto == 'hls' and f.get('has_drm'):
f['has_drm'] = 'maybe'
f['source_preference'] -= 5
return True
hls_manifest_url = streaming_data.get('hlsManifestUrl') hls_manifest_url = streaming_data.get('hlsManifestUrl')
if hls_manifest_url: if hls_manifest_url:
for f in self._extract_m3u8_formats( for f in self._extract_m3u8_formats(
hls_manifest_url, video_id, 'mp4', fatal=False): hls_manifest_url, video_id, 'mp4', fatal=False):
itag = self._search_regex( if process_manifest_format(
r'/itag/(\d+)', f['url'], 'itag', default=None) f, 'hls', None, self._search_regex(
if itag: r'/itag/(\d+)', f['url'], 'itag', default=None)):
f['format_id'] = itag
formats.append(f) formats.append(f)
if self._downloader.params.get('youtube_include_dash_manifest', True): if self._downloader.params.get('youtube_include_dash_manifest', True):
@ -2090,18 +2210,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if dash_manifest_url: if dash_manifest_url:
for f in self._extract_mpd_formats( for f in self._extract_mpd_formats(
dash_manifest_url, video_id, fatal=False): dash_manifest_url, video_id, fatal=False):
itag = f['format_id'] if process_manifest_format(
if itag in itags: f, 'dash', None, f['format_id']):
continue f['filesize'] = traverse_obj(f, (
if itag in itag_qualities: ('fragment_base_url', 'url'), T(lambda u: self._search_regex(
f['quality'] = q(itag_qualities[itag]) r'/clen/(\d+)', u, 'file size', default=None)),
filesize = int_or_none(self._search_regex( T(int_or_none)), get_all=False)
r'/clen/(\d+)', f.get('fragment_base_url')
or f['url'], 'file size', default=None))
if filesize:
f['filesize'] = filesize
formats.append(f) formats.append(f)
playable_formats = [f for f in formats if not f.get('has_drm')]
if formats and not playable_formats:
# If there are no formats that definitely don't have DRM, all have DRM
self.report_drm(video_id)
formats[:] = playable_formats
if not formats: if not formats:
if streaming_data.get('licenseInfos'): if streaming_data.get('licenseInfos'):
raise ExtractorError( raise ExtractorError(
@ -2172,6 +2294,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_details.get('lengthSeconds') video_details.get('lengthSeconds')
or microformat.get('lengthSeconds')) \ or microformat.get('lengthSeconds')) \
or parse_duration(search_meta('duration')) or parse_duration(search_meta('duration'))
for f in formats:
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# but avoid false positives with small duration differences.
# Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
if try_call(lambda x: float(x.pop('_duration_ms')) / duration < 500, args=(f,)):
self.report_warning(
'{0}: Some possibly damaged formats will be deprioritized'.format(video_id), only_once=True)
# Strictly de-prioritize damaged formats
f['preference'] = -10
is_live = video_details.get('isLive') is_live = video_details.get('isLive')
owner_profile_url = self._yt_urljoin(self._extract_author_var( owner_profile_url = self._yt_urljoin(self._extract_author_var(
@ -2180,10 +2313,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
uploader = self._extract_author_var( uploader = self._extract_author_var(
webpage, 'name', videodetails=video_details, metadata=microformat) webpage, 'name', videodetails=video_details, metadata=microformat)
if not player_url:
player_url = self._extract_player_url(webpage)
self._unthrottle_format_urls(video_id, player_url, formats)
info = { info = {
'id': video_id, 'id': video_id,
'title': self._live_title(video_title) if is_live else video_title, 'title': self._live_title(video_title) if is_live else video_title,
@ -2376,6 +2505,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'like_count': str_to_int(like_count), 'like_count': str_to_int(like_count),
'dislike_count': str_to_int(dislike_count), 'dislike_count': str_to_int(dislike_count),
}) })
else:
info['like_count'] = traverse_obj(vpir, (
'videoActions', 'menuRenderer', 'topLevelButtons', Ellipsis,
'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
'buttonViewModel', (('title', ('accessibilityText', T(lambda s: s.split()), Ellipsis))), T(parse_count)),
get_all=False)
vsir = content.get('videoSecondaryInfoRenderer') vsir = content.get('videoSecondaryInfoRenderer')
if vsir: if vsir:
rows = try_get( rows = try_get(
@ -2490,7 +2627,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists', 'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
'uploader': 'Igor Kleiner', 'uploader': 'Igor Kleiner',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
@ -2501,7 +2638,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner - Playlists', 'title': r're:Igor Kleiner(?: Ph\.D\.)? - Playlists',
'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
'uploader': 'Igor Kleiner', 'uploader': 'Igor Kleiner',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
@ -2613,7 +2750,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': { 'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels', 'title': r're:lex will - (?:Home|Channels)',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'uploader': 'lex will', 'uploader': 'lex will',
'uploader_id': '@lexwill718', 'uploader_id': '@lexwill718',

View File

@ -14,12 +14,15 @@ from .utils import (
remove_quotes, remove_quotes,
unified_timestamp, unified_timestamp,
variadic, variadic,
write_string,
) )
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
compat_chr, compat_chr,
compat_collections_chain_map as ChainMap, compat_collections_chain_map as ChainMap,
compat_filter as filter,
compat_itertools_zip_longest as zip_longest, compat_itertools_zip_longest as zip_longest,
compat_map as map,
compat_str, compat_str,
) )
@ -53,15 +56,16 @@ def wraps_op(op):
# NB In principle NaN cannot be checked by membership. # NB In principle NaN cannot be checked by membership.
# Here all NaN values are actually this one, so _NaN is _NaN, # Here all NaN values are actually this one, so _NaN is _NaN,
# although _NaN != _NaN. # although _NaN != _NaN. Ditto Infinity.
_NaN = float('nan') _NaN = float('nan')
_Infinity = float('inf')
def _js_bit_op(op): def _js_bit_op(op):
def zeroise(x): def zeroise(x):
return 0 if x in (None, JS_Undefined, _NaN) else x return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
@wraps_op(op) @wraps_op(op)
def wrapped(a, b): def wrapped(a, b):
@ -84,7 +88,7 @@ def _js_arith_op(op):
def _js_div(a, b): def _js_div(a, b):
if JS_Undefined in (a, b) or not (a or b): if JS_Undefined in (a, b) or not (a or b):
return _NaN return _NaN
return operator.truediv(a or 0, b) if b else float('inf') return operator.truediv(a or 0, b) if b else _Infinity
def _js_mod(a, b): def _js_mod(a, b):
@ -220,6 +224,42 @@ class LocalNameSpace(ChainMap):
return 'LocalNameSpace%s' % (self.maps, ) return 'LocalNameSpace%s' % (self.maps, )
class Debugger(object):
ENABLED = False
@staticmethod
def write(*args, **kwargs):
level = kwargs.get('level', 100)
def truncate_string(s, left, right=0):
if s is None or len(s) <= left + right:
return s
return '...'.join((s[:left - 3], s[-right:] if right else ''))
write_string('[debug] JS: {0}{1}\n'.format(
' ' * (100 - level),
' '.join(truncate_string(compat_str(x), 50, 50) for x in args)))
@classmethod
def wrap_interpreter(cls, f):
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
if cls.ENABLED and stmt.strip():
cls.write(stmt, level=allow_recursion)
try:
ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
except Exception as e:
if cls.ENABLED:
if isinstance(e, ExtractorError):
e = e.orig_msg
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
if should_ret or repr(ret) != stmt:
cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement
class JSInterpreter(object): class JSInterpreter(object):
__named_object_counter = 0 __named_object_counter = 0
@ -307,8 +347,7 @@ class JSInterpreter(object):
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,[') op_chars = set(';,[')
for op in cls._all_operators(): for op in cls._all_operators():
for c in op[0]: op_chars.update(op[0])
op_chars.add(c)
return op_chars return op_chars
def _named_object(self, namespace, obj): def _named_object(self, namespace, obj):
@ -326,9 +365,10 @@ class JSInterpreter(object):
# collections.Counter() is ~10% slower in both 2.7 and 3.9 # collections.Counter() is ~10% slower in both 2.7 and 3.9
counters = dict((k, 0) for k in _MATCHING_PARENS.values()) counters = dict((k, 0) for k in _MATCHING_PARENS.values())
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping, skipping = None, False, 0 in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
after_op, in_regex_char_group = True, False skipping = 0
if skip_delims:
skip_delims = variadic(skip_delims)
for idx, char in enumerate(expr): for idx, char in enumerate(expr):
paren_delta = 0 paren_delta = 0
if not in_quote: if not in_quote:
@ -355,7 +395,7 @@ class JSInterpreter(object):
continue continue
elif pos == 0 and skip_delims: elif pos == 0 and skip_delims:
here = expr[idx:] here = expr[idx:]
for s in variadic(skip_delims): for s in skip_delims:
if here.startswith(s) and s: if here.startswith(s) and s:
skipping = len(s) - 1 skipping = len(s) - 1
break break
@ -376,16 +416,17 @@ class JSInterpreter(object):
if delim is None: if delim is None:
delim = expr and _MATCHING_PARENS[expr[0]] delim = expr and _MATCHING_PARENS[expr[0]]
separated = list(cls._separate(expr, delim, 1)) separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2: if len(separated) < 2:
raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals())) raise cls.Exception('No terminating paren {delim} in {expr!r:.5500}'.format(**locals()))
return separated[0][1:].strip(), separated[1].strip() return separated[0][1:].strip(), separated[1].strip()
@staticmethod @staticmethod
def _all_operators(): def _all_operators(_cached=[]):
return itertools.chain( if not _cached:
_cached.extend(itertools.chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS) _SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
return _cached
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
if op in ('||', '&&'): if op in ('||', '&&'):
@ -416,7 +457,7 @@ class JSInterpreter(object):
except Exception as e: except Exception as e:
if allow_undefined: if allow_undefined:
return JS_Undefined return JS_Undefined
raise self.Exception('Cannot get index {idx:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace): def _dump(self, obj, namespace):
try: try:
@ -438,6 +479,7 @@ class JSInterpreter(object):
_FINALLY_RE = re.compile(r'finally\s*\{') _FINALLY_RE = re.compile(r'finally\s*\{')
_SWITCH_RE = re.compile(r'switch\s*\(') _SWITCH_RE = re.compile(r'switch\s*\(')
@Debugger.wrap_interpreter
def interpret_statement(self, stmt, local_vars, allow_recursion=100): def interpret_statement(self, stmt, local_vars, allow_recursion=100):
if allow_recursion < 0: if allow_recursion < 0:
raise self.Exception('Recursion limit reached') raise self.Exception('Recursion limit reached')
@ -448,6 +490,7 @@ class JSInterpreter(object):
# fails on (eg) if (...) stmt1; else stmt2; # fails on (eg) if (...) stmt1; else stmt2;
sub_statements = list(self._separate(stmt, ';')) or [''] sub_statements = list(self._separate(stmt, ';')) or ['']
expr = stmt = sub_statements.pop().strip() expr = stmt = sub_statements.pop().strip()
for sub_stmt in sub_statements: for sub_stmt in sub_statements:
ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion) ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion)
if should_return: if should_return:
@ -511,7 +554,6 @@ class JSInterpreter(object):
expr = self._dump(inner, local_vars) + outer expr = self._dump(inner, local_vars) + outer
if expr.startswith('('): if expr.startswith('('):
m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr) m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
if m: if m:
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig` # short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
@ -588,8 +630,7 @@ class JSInterpreter(object):
if m.group('err'): if m.group('err'):
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
catch_vars = local_vars.new_child(m=catch_vars) catch_vars = local_vars.new_child(m=catch_vars)
err = None err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion)
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
m = self._FINALLY_RE.match(expr) m = self._FINALLY_RE.match(expr)
if m: if m:
@ -693,7 +734,7 @@ class JSInterpreter(object):
(?P<op>{_OPERATOR_RE})? (?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$ =(?!=)(?P<expr>.*)$
)|(?P<return> )|(?P<return>
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$ (?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing> )|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<attribute> )|(?P<attribute>
@ -727,11 +768,12 @@ class JSInterpreter(object):
raise JS_Break() raise JS_Break()
elif expr == 'continue': elif expr == 'continue':
raise JS_Continue() raise JS_Continue()
elif expr == 'undefined': elif expr == 'undefined':
return JS_Undefined, should_return return JS_Undefined, should_return
elif expr == 'NaN': elif expr == 'NaN':
return _NaN, should_return return _NaN, should_return
elif expr == 'Infinity':
return _Infinity, should_return
elif md.get('return'): elif md.get('return'):
return local_vars[m.group('name')], should_return return local_vars[m.group('name')], should_return
@ -760,17 +802,30 @@ class JSInterpreter(object):
right_expr = separated.pop() right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS # handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'): if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0 undone = 0
while len(separated) > 1 and not separated[-1].strip(): separated = [s.strip() for s in separated]
while len(separated) > 1 and not separated[-1]:
undone += 1 undone += 1
separated.pop() separated.pop()
if op == '-' and undone % 2 != 0: if op == '-' and undone % 2 != 0:
right_expr = op + right_expr right_expr = op + right_expr
left_val = separated[-1] elif op == '+':
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
right_expr = separated.pop() + right_expr
if separated[-1][-1:] in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1] if separated else ''
for dm_op in ('*', '%', '/', '**'): for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim)) bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip(): if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr expr = op.join(separated) + op + right_expr
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None right_expr = None
break break
if right_expr is None: if right_expr is None:
@ -795,12 +850,15 @@ class JSInterpreter(object):
memb = member memb = member
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr) raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
def eval_method(): def eval_method(variable, member):
if (variable, member) == ('console', 'debug'): if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
return return
types = { types = {
'String': compat_str, 'String': compat_str,
'Math': float, 'Math': float,
'Array': list,
} }
obj = local_vars.get(variable) obj = local_vars.get(variable)
if obj in (JS_Undefined, None): if obj in (JS_Undefined, None):
@ -826,12 +884,29 @@ class JSInterpreter(object):
self.interpret_expression(v, local_vars, allow_recursion) self.interpret_expression(v, local_vars, allow_recursion)
for v in self._separate(arg_str)] for v in self._separate(arg_str)]
if obj == compat_str: # Fixup prototype call
if isinstance(obj, type):
new_member, rest = member.partition('.')[0::2]
if new_member == 'prototype':
new_member, func_prototype = rest.partition('.')[0::2]
assertion(argvals, 'takes one or more arguments')
assertion(isinstance(argvals[0], obj), 'must bind to type {0}'.format(obj))
if func_prototype == 'call':
obj = argvals.pop(0)
elif func_prototype == 'apply':
assertion(len(argvals) == 2, 'takes two arguments')
obj, argvals = argvals
assertion(isinstance(argvals, list), 'second argument must be a list')
else:
raise self.Exception('Unsupported Function method ' + func_prototype, expr)
member = new_member
if obj is compat_str:
if member == 'fromCharCode': if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')
return ''.join(map(compat_chr, argvals)) return ''.join(map(compat_chr, argvals))
raise self.Exception('Unsupported string method ' + member, expr=expr) raise self.Exception('Unsupported string method ' + member, expr=expr)
elif obj == float: elif obj is float:
if member == 'pow': if member == 'pow':
assertion(len(argvals) == 2, 'takes two arguments') assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1] return argvals[0] ** argvals[1]
@ -850,18 +925,25 @@ class JSInterpreter(object):
obj.reverse() obj.reverse()
return obj return obj
elif member == 'slice': elif member == 'slice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
assertion(len(argvals) == 1, 'takes exactly one argument') # From [1]:
return obj[argvals[0]:] # .slice() - like [:]
# .slice(n) - like [n:] (not [slice(n)]
# .slice(m, n) - like [m:n] or [slice(m, n)]
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
if len(argvals) < 2:
argvals += (None,)
return obj[slice(*argvals)]
elif member == 'splice': elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')
index, howMany = map(int, (argvals + [len(obj)])[:2]) index, how_many = map(int, (argvals + [len(obj)])[:2])
if index < 0: if index < 0:
index += len(obj) index += len(obj)
add_items = argvals[2:] add_items = argvals[2:]
res = [] res = []
for i in range(index, min(index + howMany, len(obj))): for _ in range(index, min(index + how_many, len(obj))):
res.append(obj.pop(index)) res.append(obj.pop(index))
for i, item in enumerate(add_items): for i, item in enumerate(add_items):
obj.insert(index + i, item) obj.insert(index + i, item)
@ -919,11 +1001,11 @@ class JSInterpreter(object):
if remaining: if remaining:
ret, should_abort = self.interpret_statement( ret, should_abort = self.interpret_statement(
self._named_object(local_vars, eval_method()) + remaining, self._named_object(local_vars, eval_method(variable, member)) + remaining,
local_vars, allow_recursion) local_vars, allow_recursion)
return ret, should_return or should_abort return ret, should_return or should_abort
else: else:
return eval_method(), should_return return eval_method(variable, member), should_return
elif md.get('function'): elif md.get('function'):
fname = m.group('fname') fname = m.group('fname')
@ -951,28 +1033,25 @@ class JSInterpreter(object):
def extract_object(self, objname): def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
obj = {} obj = {}
fields = None fields = next(filter(None, (
for obj_m in re.finditer( obj_m.group('fields') for obj_m in re.finditer(
r'''(?xs) r'''(?xs)
{0}\s*\.\s*{1}|{1}\s*=\s*\{{\s* {0}\s*\.\s*{1}|{1}\s*=\s*\{{\s*
(?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*) (?P<fields>({2}\s*:\s*function\s*\(.*?\)\s*\{{.*?}}(?:,\s*)?)*)
}}\s*; }}\s*;
'''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE), '''.format(_NAME_RE, re.escape(objname), _FUNC_NAME_RE),
self.code): self.code))), None)
fields = obj_m.group('fields') if not fields:
if fields:
break
else:
raise self.Exception('Could not find object ' + objname) raise self.Exception('Could not find object ' + objname)
# Currently, it only supports function definitions # Currently, it only supports function definitions
fields_m = re.finditer( for f in re.finditer(
r'''(?x) r'''(?x)
(?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)} (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
''' % (_FUNC_NAME_RE, _NAME_RE), ''' % (_FUNC_NAME_RE, _NAME_RE),
fields) fields):
for f in fields_m:
argnames = self.build_arglist(f.group('args')) argnames = self.build_arglist(f.group('args'))
obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) name = remove_quotes(f.group('key'))
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
return obj return obj
@ -1007,7 +1086,7 @@ class JSInterpreter(object):
def extract_function(self, funcname): def extract_function(self, funcname):
return function_with_repr( return function_with_repr(
self.extract_function_from_code(*self.extract_function_code(funcname)), self.extract_function_from_code(*self.extract_function_code(funcname)),
'F<%s>' % (funcname, )) 'F<%s>' % (funcname,))
def extract_function_from_code(self, argnames, code, *global_stack): def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {} local_vars = {}
@ -1016,7 +1095,7 @@ class JSInterpreter(object):
if mobj is None: if mobj is None:
break break
start, body_start = mobj.span() start, body_start = mobj.span()
body, remaining = self._separate_at_paren(code[body_start - 1:], '}') body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(local_vars, self.extract_function_from_code( name = self._named_object(local_vars, self.extract_function_from_code(
[x.strip() for x in mobj.group('args').split(',')], [x.strip() for x in mobj.group('args').split(',')],
body, local_vars, *global_stack)) body, local_vars, *global_stack))
@ -1044,8 +1123,7 @@ class JSInterpreter(object):
argnames = tuple(argnames) argnames = tuple(argnames)
def resf(args, kwargs={}, allow_recursion=100): def resf(args, kwargs={}, allow_recursion=100):
global_stack[0].update( global_stack[0].update(zip_longest(argnames, args, fillvalue=None))
zip_longest(argnames, args, fillvalue=None))
global_stack[0].update(kwargs) global_stack[0].update(kwargs)
var_stack = LocalNameSpace(*global_stack) var_stack = LocalNameSpace(*global_stack)
ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1) ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)

View File

@ -533,6 +533,10 @@ def parseOpts(overrideArguments=None):
'--no-check-certificate', '--no-check-certificate',
action='store_true', dest='no_check_certificate', default=False, action='store_true', dest='no_check_certificate', default=False,
help='Suppress HTTPS certificate validation') help='Suppress HTTPS certificate validation')
workarounds.add_option(
'--no-check-extensions',
action='store_true', dest='no_check_extensions', default=False,
help='Suppress file extension validation')
workarounds.add_option( workarounds.add_option(
'--prefer-insecure', '--prefer-insecure',
'--prefer-unsecure', action='store_true', dest='prefer_insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure',

View File

@ -74,8 +74,11 @@ class FFmpegPostProcessor(PostProcessor):
return FFmpegPostProcessor(downloader)._versions return FFmpegPostProcessor(downloader)._versions
def _determine_executables(self): def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] # ordered to match prefer_ffmpeg!
convs = ['ffmpeg', 'avconv']
probes = ['ffprobe', 'avprobe']
prefer_ffmpeg = True prefer_ffmpeg = True
programs = convs + probes
def get_ffmpeg_version(path): def get_ffmpeg_version(path):
ver = get_exe_version(path, args=['-version']) ver = get_exe_version(path, args=['-version'])
@ -96,6 +99,7 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = None self._paths = None
self._versions = None self._versions = None
location = None
if self._downloader: if self._downloader:
prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True) prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
location = self._downloader.params.get('ffmpeg_location') location = self._downloader.params.get('ffmpeg_location')
@ -118,33 +122,21 @@ class FFmpegPostProcessor(PostProcessor):
location = os.path.dirname(os.path.abspath(location)) location = os.path.dirname(os.path.abspath(location))
if basename in ('ffmpeg', 'ffprobe'): if basename in ('ffmpeg', 'ffprobe'):
prefer_ffmpeg = True prefer_ffmpeg = True
self._paths = dict( self._paths = dict(
(p, os.path.join(location, p)) for p in programs) (p, p if location is None else os.path.join(location, p))
for p in programs)
self._versions = dict( self._versions = dict(
x for x in (
(p, get_ffmpeg_version(self._paths[p])) for p in programs) (p, get_ffmpeg_version(self._paths[p])) for p in programs)
if self._versions is None: if x[1] is not None)
self._versions = dict(
(p, get_ffmpeg_version(p)) for p in programs)
self._paths = dict((p, p) for p in programs)
if prefer_ffmpeg is False: basenames = [None, None]
prefs = ('avconv', 'ffmpeg') for i, progs in enumerate((convs, probes)):
else: for p in progs[::-1 if prefer_ffmpeg is False else 1]:
prefs = ('ffmpeg', 'avconv') if self._versions.get(p):
for p in prefs: basenames[i] = p
if self._versions[p]:
self.basename = p
break
if prefer_ffmpeg is False:
prefs = ('avprobe', 'ffprobe')
else:
prefs = ('ffprobe', 'avprobe')
for p in prefs:
if self._versions[p]:
self.probe_basename = p
break break
self.basename, self.probe_basename = basenames
@property @property
def available(self): def available(self):

10
youtube_dl/traversal.py Normal file
View File

@ -0,0 +1,10 @@
# coding: utf-8
# TODO: move these utils.fns here and move import to utils
# flake8: noqa
from .utils import (
dict_get,
get_first,
T,
traverse_obj,
)

View File

@ -45,14 +45,18 @@ from .compat import (
compat_casefold, compat_casefold,
compat_chr, compat_chr,
compat_collections_abc, compat_collections_abc,
compat_contextlib_suppress,
compat_cookiejar, compat_cookiejar,
compat_ctypes_WINFUNCTYPE, compat_ctypes_WINFUNCTYPE,
compat_datetime_timedelta_total_seconds, compat_datetime_timedelta_total_seconds,
compat_etree_Element,
compat_etree_fromstring, compat_etree_fromstring,
compat_etree_iterfind,
compat_expanduser, compat_expanduser,
compat_html_entities, compat_html_entities,
compat_html_entities_html5, compat_html_entities_html5,
compat_http_client, compat_http_client,
compat_http_cookies,
compat_integer_types, compat_integer_types,
compat_kwargs, compat_kwargs,
compat_ncompress as ncompress, compat_ncompress as ncompress,
@ -1713,21 +1717,6 @@ TIMEZONE_NAMES = {
'PST': -8, 'PDT': -7 # Pacific 'PST': -8, 'PDT': -7 # Pacific
} }
KNOWN_EXTENSIONS = (
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
'flv', 'f4v', 'f4a', 'f4b',
'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
'mkv', 'mka', 'mk3d',
'avi', 'divx',
'mov',
'asf', 'wmv', 'wma',
'3gp', '3g2',
'mp3',
'flac',
'ape',
'wav',
'f4f', 'f4m', 'm3u8', 'smil')
# needed for sanitizing filenames in restricted mode # needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'], itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
@ -1855,25 +1844,18 @@ def write_json_file(obj, fn):
try: try:
with tf: with tf:
json.dump(obj, tf) json.dump(obj, tf)
with compat_contextlib_suppress(OSError):
if sys.platform == 'win32': if sys.platform == 'win32':
# Need to remove existing file on Windows, else os.rename raises # Need to remove existing file on Windows, else os.rename raises
# WindowsError or FileExistsError. # WindowsError or FileExistsError.
try:
os.unlink(fn) os.unlink(fn)
except OSError:
pass
try:
mask = os.umask(0) mask = os.umask(0)
os.umask(mask) os.umask(mask)
os.chmod(tf.name, 0o666 & ~mask) os.chmod(tf.name, 0o666 & ~mask)
except OSError:
pass
os.rename(tf.name, fn) os.rename(tf.name, fn)
except Exception: except Exception:
try: with compat_contextlib_suppress(OSError):
os.remove(tf.name) os.remove(tf.name)
except OSError:
pass
raise raise
@ -2033,14 +2015,13 @@ def extract_attributes(html_element):
NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
""" """
parser = HTMLAttributeParser() ret = None
try: # Older Python may throw HTMLParseError in case of malformed HTML (and on .close()!)
with compat_contextlib_suppress(compat_HTMLParseError):
with contextlib.closing(HTMLAttributeParser()) as parser:
parser.feed(html_element) parser.feed(html_element)
parser.close() ret = parser.attrs
# Older Python may throw HTMLParseError in case of malformed HTML return ret or {}
except compat_HTMLParseError:
pass
return parser.attrs
def clean_html(html): def clean_html(html):
@ -2121,7 +2102,8 @@ def sanitize_filename(s, restricted=False, is_id=False):
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
return '_' return '_'
if restricted and ord(char) > 127: if restricted and ord(char) > 127:
return '_' return '' if unicodedata.category(char)[0] in 'CM' else '_'
return char return char
# Replace look-alike Unicode glyphs # Replace look-alike Unicode glyphs
@ -2181,8 +2163,28 @@ def sanitize_url(url):
return url return url
def extract_basic_auth(url):
parts = compat_urllib_parse.urlsplit(url)
if parts.username is None:
return url, None
url = compat_urllib_parse.urlunsplit(parts._replace(netloc=(
parts.hostname if parts.port is None
else '%s:%d' % (parts.hostname, parts.port))))
auth_payload = base64.b64encode(
('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
return url, 'Basic {0}'.format(auth_payload.decode('ascii'))
def sanitized_Request(url, *args, **kwargs): def sanitized_Request(url, *args, **kwargs):
return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs) url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
if auth_header is not None:
headers = args[1] if len(args) > 1 else kwargs.get('headers')
headers = headers or {}
headers['Authorization'] = auth_header
if len(args) <= 1 and kwargs.get('headers') is None:
kwargs['headers'] = headers
kwargs = compat_kwargs(kwargs)
return compat_urllib_request.Request(url, *args, **kwargs)
def expand_path(s): def expand_path(s):
@ -2220,7 +2222,8 @@ def _htmlentity_transform(entity_with_semicolon):
numstr = '0%s' % numstr numstr = '0%s' % numstr
else: else:
base = 10 base = 10
# See https://github.com/ytdl-org/youtube-dl/issues/7518 # See https://github.com/ytdl-org/youtube-dl/issues/7518\
# Also, weirdly, compat_contextlib_suppress fails here in 2.6
try: try:
return compat_chr(int(numstr, base)) return compat_chr(int(numstr, base))
except ValueError: except ValueError:
@ -2327,11 +2330,9 @@ def make_HTTPS_handler(params, **kwargs):
# Some servers may (wrongly) reject requests if ALPN extension is not sent. See: # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
# https://github.com/python/cpython/issues/85140 # https://github.com/python/cpython/issues/85140
# https://github.com/yt-dlp/yt-dlp/issues/3878 # https://github.com/yt-dlp/yt-dlp/issues/3878
try: with compat_contextlib_suppress(AttributeError, NotImplementedError):
# fails for Python < 2.7.10, not ssl.HAS_ALPN
ctx.set_alpn_protocols(ALPN_PROTOCOLS) ctx.set_alpn_protocols(ALPN_PROTOCOLS)
except (AttributeError, NotImplementedError):
# Python < 2.7.10, not ssl.HAS_ALPN
pass
opts_no_check_certificate = params.get('nocheckcertificate', False) opts_no_check_certificate = params.get('nocheckcertificate', False)
if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
@ -2341,12 +2342,10 @@ def make_HTTPS_handler(params, **kwargs):
context.check_hostname = False context.check_hostname = False
context.verify_mode = ssl.CERT_NONE context.verify_mode = ssl.CERT_NONE
try: with compat_contextlib_suppress(TypeError):
# Fails with Python 2.7.8 (create_default_context present
# but HTTPSHandler has no context=)
return YoutubeDLHTTPSHandler(params, context=context, **kwargs) return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
except TypeError:
# Python 2.7.8
# (create_default_context present but HTTPSHandler has no context=)
pass
if sys.version_info < (3, 2): if sys.version_info < (3, 2):
return YoutubeDLHTTPSHandler(params, **kwargs) return YoutubeDLHTTPSHandler(params, **kwargs)
@ -2360,15 +2359,24 @@ def make_HTTPS_handler(params, **kwargs):
return YoutubeDLHTTPSHandler(params, context=context, **kwargs) return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
def bug_reports_message(): def bug_reports_message(before=';'):
if ytdl_is_updateable(): if ytdl_is_updateable():
update_cmd = 'type youtube-dl -U to update' update_cmd = 'type youtube-dl -U to update'
else: else:
update_cmd = 'see https://yt-dl.org/update on how to update' update_cmd = 'see https://github.com/ytdl-org/youtube-dl/#user-content-installation on how to update'
msg = '; please report this issue on https://yt-dl.org/bug .'
msg += ' Make sure you are using the latest version; %s.' % update_cmd msg = (
msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' 'please report this issue on https://github.com/ytdl-org/youtube-dl/issues ,'
return msg ' using the appropriate issue template.'
' Make sure you are using the latest version; %s.'
' Be sure to call youtube-dl with the --verbose option and include the complete output.'
) % update_cmd
before = (before or '').rstrip()
if not before or before.endswith(('.', '!', '?')):
msg = msg[0].title() + msg[1:]
return (before + ' ' if before else '') + msg
class YoutubeDLError(Exception): class YoutubeDLError(Exception):
@ -2383,7 +2391,7 @@ class ExtractorError(YoutubeDLError):
""" tb, if given, is the original traceback (so that it can be printed out). """ tb, if given, is the original traceback (so that it can be printed out).
If expected is set, this is a normal error message and most likely not a bug in youtube-dl. If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
""" """
self.orig_msg = msg
if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError): if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
expected = True expected = True
if video_id is not None: if video_id is not None:
@ -3155,12 +3163,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
if timezone is None: if timezone is None:
timezone, date_str = extract_timezone(date_str) timezone, date_str = extract_timezone(date_str)
try: with compat_contextlib_suppress(ValueError):
date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
dt = datetime.datetime.strptime(date_str, date_format) - timezone dt = datetime.datetime.strptime(date_str, date_format) - timezone
return calendar.timegm(dt.timetuple()) return calendar.timegm(dt.timetuple())
except ValueError:
pass
def date_formats(day_first=True): def date_formats(day_first=True):
@ -3180,17 +3186,13 @@ def unified_strdate(date_str, day_first=True):
_, date_str = extract_timezone(date_str) _, date_str = extract_timezone(date_str)
for expression in date_formats(day_first): for expression in date_formats(day_first):
try: with compat_contextlib_suppress(ValueError):
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
except ValueError:
pass
if upload_date is None: if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str) timetuple = email.utils.parsedate_tz(date_str)
if timetuple: if timetuple:
try: with compat_contextlib_suppress(ValueError):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
except ValueError:
pass
if upload_date is not None: if upload_date is not None:
return compat_str(upload_date) return compat_str(upload_date)
@ -3219,11 +3221,9 @@ def unified_timestamp(date_str, day_first=True):
date_str = m.group(1) date_str = m.group(1)
for expression in date_formats(day_first): for expression in date_formats(day_first):
try: with compat_contextlib_suppress(ValueError):
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
return calendar.timegm(dt.timetuple()) return calendar.timegm(dt.timetuple())
except ValueError:
pass
timetuple = email.utils.parsedate_tz(date_str) timetuple = email.utils.parsedate_tz(date_str)
if timetuple: if timetuple:
return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone) return calendar.timegm(timetuple) + pm_delta * 3600 - compat_datetime_timedelta_total_seconds(timezone)
@ -3831,14 +3831,15 @@ class PUTRequest(compat_urllib_request.Request):
return 'PUT' return 'PUT'
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
if get_attr: if get_attr:
if v is not None: if v is not None:
v = getattr(v, get_attr, None) v = getattr(v, get_attr, None)
if v in (None, ''): if v in (None, ''):
return default return default
try: try:
return int(v) * invscale // scale # like int, raise if base is specified and v is not a string
return (int(v) if base is None else int(v, base=base)) * invscale // scale
except (ValueError, TypeError, OverflowError): except (ValueError, TypeError, OverflowError):
return default return default
@ -3943,19 +3944,22 @@ def parse_duration(s):
return duration return duration
def prepend_extension(filename, ext, expected_real_ext=None): def _change_extension(prepend, filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename) name, real_ext = os.path.splitext(filename)
return ( sanitize_extension = _UnsafeExtensionError.sanitize_extension
'{0}.{1}{2}'.format(name, ext, real_ext)
if not expected_real_ext or real_ext[1:] == expected_real_ext if not expected_real_ext or real_ext.partition('.')[0::2] == ('', expected_real_ext):
else '{0}.{1}'.format(filename, ext)) filename = name
if prepend and real_ext:
sanitize_extension(ext, prepend=prepend)
return ''.join((filename, '.', ext, real_ext))
# Mitigate path traversal and file impersonation attacks
return '.'.join((filename, sanitize_extension(ext)))
def replace_extension(filename, ext, expected_real_ext=None): prepend_extension = functools.partial(_change_extension, True)
name, real_ext = os.path.splitext(filename) replace_extension = functools.partial(_change_extension, False)
return '{0}.{1}'.format(
name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
ext)
def check_executable(exe, args=[]): def check_executable(exe, args=[]):
@ -6240,15 +6244,16 @@ if __debug__:
def traverse_obj(obj, *paths, **kwargs): def traverse_obj(obj, *paths, **kwargs):
""" """
Safely traverse nested `dict`s and `Iterable`s Safely traverse nested `dict`s and `Iterable`s, etc
>>> obj = [{}, {"key": "value"}] >>> obj = [{}, {"key": "value"}]
>>> traverse_obj(obj, (1, "key")) >>> traverse_obj(obj, (1, "key"))
"value" 'value'
Each of the provided `paths` is tested and the first producing a valid result will be returned. Each of the provided `paths` is tested and the first producing a valid result will be returned.
The next path will also be tested if the path branched but no results could be found. The next path will also be tested if the path branched but no results could be found.
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`. Supported values for traversal are `Mapping`, `Iterable`, `re.Match`, `xml.etree.ElementTree`
(xpath) and `http.cookies.Morsel`.
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded. Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`. The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
@ -6256,8 +6261,9 @@ def traverse_obj(obj, *paths, **kwargs):
The keys in the path can be one of: The keys in the path can be one of:
- `None`: Return the current object. - `None`: Return the current object.
- `set`: Requires the only item in the set to be a type or function, - `set`: Requires the only item in the set to be a type or function,
like `{type}`/`{func}`. If a `type`, returns only values like `{type}`/`{type, type, ...}`/`{func}`. If one or more `type`s,
of this type. If a function, returns `func(obj)`. return only values that have one of the types. If a function,
return `func(obj)`.
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`. - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
- `slice`: Branch out and return all values in `obj[key]`. - `slice`: Branch out and return all values in `obj[key]`.
- `Ellipsis`: Branch out and return a list of all values. - `Ellipsis`: Branch out and return a list of all values.
@ -6269,8 +6275,10 @@ def traverse_obj(obj, *paths, **kwargs):
For `Iterable`s, `key` is the enumeration count of the value. For `Iterable`s, `key` is the enumeration count of the value.
For `re.Match`es, `key` is the group number (0 = full match) For `re.Match`es, `key` is the group number (0 = full match)
as well as additionally any group names, if given. as well as additionally any group names, if given.
- `dict` Transform the current object and return a matching dict. - `dict`: Transform the current object and return a matching dict.
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`. Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
- `any`-builtin: Take the first matching object and return it, resetting branching.
- `all`-builtin: Take all matching objects and return them as a list, resetting branching.
`tuple`, `list`, and `dict` all support nested paths and branches. `tuple`, `list`, and `dict` all support nested paths and branches.
@ -6286,10 +6294,8 @@ def traverse_obj(obj, *paths, **kwargs):
@param get_all If `False`, return the first matching result, otherwise all matching ones. @param get_all If `False`, return the first matching result, otherwise all matching ones.
@param casesense If `False`, consider string dictionary keys as case insensitive. @param casesense If `False`, consider string dictionary keys as case insensitive.
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API The following is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
@param _is_user_input Whether the keys are generated from user input.
If `True` strings get converted to `int`/`slice` if needed.
@param _traverse_string Whether to traverse into objects as strings. @param _traverse_string Whether to traverse into objects as strings.
If `True`, any non-compatible object will first be If `True`, any non-compatible object will first be
converted into a string and then traversed into. converted into a string and then traversed into.
@ -6309,7 +6315,6 @@ def traverse_obj(obj, *paths, **kwargs):
expected_type = kwargs.get('expected_type') expected_type = kwargs.get('expected_type')
get_all = kwargs.get('get_all', True) get_all = kwargs.get('get_all', True)
casesense = kwargs.get('casesense', True) casesense = kwargs.get('casesense', True)
_is_user_input = kwargs.get('_is_user_input', False)
_traverse_string = kwargs.get('_traverse_string', False) _traverse_string = kwargs.get('_traverse_string', False)
# instant compat # instant compat
@ -6323,10 +6328,8 @@ def traverse_obj(obj, *paths, **kwargs):
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,)) type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
def lookup_or_none(v, k, getter=None): def lookup_or_none(v, k, getter=None):
try: with compat_contextlib_suppress(LookupError):
return getter(v, k) if getter else v[k] return getter(v, k) if getter else v[k]
except IndexError:
return None
def from_iterable(iterables): def from_iterable(iterables):
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
@ -6348,12 +6351,13 @@ def traverse_obj(obj, *paths, **kwargs):
result = obj result = obj
elif isinstance(key, set): elif isinstance(key, set):
assert len(key) == 1, 'Set should only be used to wrap a single item' assert len(key) >= 1, 'At least one item is required in a `set` key'
item = next(iter(key)) if all(isinstance(item, type) for item in key):
if isinstance(item, type): result = obj if isinstance(obj, tuple(key)) else None
result = obj if isinstance(obj, item) else None
else: else:
result = try_call(item, args=(obj,)) item = next(iter(key))
assert len(key) == 1, 'Multiple items in a `set` key must all be types'
result = try_call(item, args=(obj,)) if not isinstance(item, type) else None
elif isinstance(key, (list, tuple)): elif isinstance(key, (list, tuple)):
branching = True branching = True
@ -6362,9 +6366,11 @@ def traverse_obj(obj, *paths, **kwargs):
elif key is Ellipsis: elif key is Ellipsis:
branching = True branching = True
if isinstance(obj, compat_http_cookies.Morsel):
obj = dict(obj, key=obj.key, value=obj.value)
if isinstance(obj, compat_collections_abc.Mapping): if isinstance(obj, compat_collections_abc.Mapping):
result = obj.values() result = obj.values()
elif is_iterable_like(obj): elif is_iterable_like(obj, (compat_collections_abc.Iterable, compat_etree_Element)):
result = obj result = obj
elif isinstance(obj, compat_re_Match): elif isinstance(obj, compat_re_Match):
result = obj.groups() result = obj.groups()
@ -6376,9 +6382,11 @@ def traverse_obj(obj, *paths, **kwargs):
elif callable(key): elif callable(key):
branching = True branching = True
if isinstance(obj, compat_http_cookies.Morsel):
obj = dict(obj, key=obj.key, value=obj.value)
if isinstance(obj, compat_collections_abc.Mapping): if isinstance(obj, compat_collections_abc.Mapping):
iter_obj = obj.items() iter_obj = obj.items()
elif is_iterable_like(obj): elif is_iterable_like(obj, (compat_collections_abc.Iterable, compat_etree_Element)):
iter_obj = enumerate(obj) iter_obj = enumerate(obj)
elif isinstance(obj, compat_re_Match): elif isinstance(obj, compat_re_Match):
iter_obj = itertools.chain( iter_obj = itertools.chain(
@ -6400,6 +6408,8 @@ def traverse_obj(obj, *paths, **kwargs):
if v is not None or default is not NO_DEFAULT) or None if v is not None or default is not NO_DEFAULT) or None
elif isinstance(obj, compat_collections_abc.Mapping): elif isinstance(obj, compat_collections_abc.Mapping):
if isinstance(obj, compat_http_cookies.Morsel):
obj = dict(obj, key=obj.key, value=obj.value)
result = (try_call(obj.get, args=(key,)) result = (try_call(obj.get, args=(key,))
if casesense or try_call(obj.__contains__, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,))
else next((v for k, v in obj.items() if casefold(k) == key), None)) else next((v for k, v in obj.items() if casefold(k) == key), None))
@ -6417,12 +6427,40 @@ def traverse_obj(obj, *paths, **kwargs):
else: else:
result = None result = None
if isinstance(key, (int, slice)): if isinstance(key, (int, slice)):
if is_iterable_like(obj, compat_collections_abc.Sequence): if is_iterable_like(obj, (compat_collections_abc.Sequence, compat_etree_Element)):
branching = isinstance(key, slice) branching = isinstance(key, slice)
result = lookup_or_none(obj, key) result = lookup_or_none(obj, key)
elif _traverse_string: elif _traverse_string:
result = lookup_or_none(str(obj), key) result = lookup_or_none(str(obj), key)
elif isinstance(obj, compat_etree_Element) and isinstance(key, str):
xpath, _, special = key.rpartition('/')
if not special.startswith('@') and not special.endswith('()'):
xpath = key
special = None
# Allow abbreviations of relative paths, absolute paths error
if xpath.startswith('/'):
xpath = '.' + xpath
elif xpath and not xpath.startswith('./'):
xpath = './' + xpath
def apply_specials(element):
if special is None:
return element
if special == '@':
return element.attrib
if special.startswith('@'):
return try_call(element.attrib.get, args=(special[1:],))
if special == 'text()':
return element.text
raise SyntaxError('apply_specials is missing case for {0!r}'.format(special))
if xpath:
result = list(map(apply_specials, compat_etree_iterfind(obj, xpath)))
else:
result = apply_specials(obj)
return branching, result if branching else (result,) return branching, result if branching else (result,)
def lazy_last(iterable): def lazy_last(iterable):
@ -6443,17 +6481,18 @@ def traverse_obj(obj, *paths, **kwargs):
key = None key = None
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))): for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
if _is_user_input and isinstance(key, str):
if key == ':':
key = Ellipsis
elif ':' in key:
key = slice(*map(int_or_none, key.split(':')))
elif int_or_none(key) is not None:
key = int(key)
if not casesense and isinstance(key, str): if not casesense and isinstance(key, str):
key = compat_casefold(key) key = compat_casefold(key)
if key in (any, all):
has_branched = False
filtered_objs = (obj for obj in objs if obj not in (None, {}))
if key is any:
objs = (next(filtered_objs, None),)
else:
objs = (list(filtered_objs),)
continue
if __debug__ and callable(key): if __debug__ and callable(key):
# Verify function signature # Verify function signature
_try_bind_args(key, None, None) _try_bind_args(key, None, None)
@ -6492,9 +6531,9 @@ def traverse_obj(obj, *paths, **kwargs):
return None if default is NO_DEFAULT else default return None if default is NO_DEFAULT else default
def T(x): def T(*x):
""" For use in yt-dl instead of {type} or set((type,)) """ """ For use in yt-dl instead of {type, ...} or set((type, ...)) """
return set((x,)) return set(x)
def get_first(obj, keys, **kwargs): def get_first(obj, keys, **kwargs):
@ -6510,3 +6549,169 @@ def join_nonempty(*values, **kwargs):
if from_dict is not None: if from_dict is not None:
values = (traverse_obj(from_dict, variadic(v)) for v in values) values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(compat_str, filter(None, values))) return delim.join(map(compat_str, filter(None, values)))
class Namespace(object):
"""Immutable namespace"""
def __init__(self, **kw_attr):
self.__dict__.update(kw_attr)
def __iter__(self):
return iter(self.__dict__.values())
@property
def items_(self):
return self.__dict__.items()
MEDIA_EXTENSIONS = Namespace(
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
thumbnails=('jpg', 'png', 'webp'),
# storyboards=('mhtml', ),
subtitles=('srt', 'vtt', 'ass', 'lrc', 'ttml'),
manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
)
MEDIA_EXTENSIONS.video = MEDIA_EXTENSIONS.common_video + MEDIA_EXTENSIONS.video
MEDIA_EXTENSIONS.audio = MEDIA_EXTENSIONS.common_audio + MEDIA_EXTENSIONS.audio
KNOWN_EXTENSIONS = (
MEDIA_EXTENSIONS.video + MEDIA_EXTENSIONS.audio
+ MEDIA_EXTENSIONS.manifests
)
class _UnsafeExtensionError(Exception):
"""
Mitigation exception for unwanted file overwrite/path traversal
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
"""
_ALLOWED_EXTENSIONS = frozenset(itertools.chain(
( # internal
'description',
'json',
'meta',
'orig',
'part',
'temp',
'uncut',
'unknown_video',
'ytdl',
),
# video
MEDIA_EXTENSIONS.video, (
'asx',
'ismv',
'm2t',
'm2ts',
'm2v',
'm4s',
'mng',
'mp2v',
'mp4v',
'mpe',
'mpeg',
'mpeg1',
'mpeg2',
'mpeg4',
'mxf',
'ogm',
'qt',
'rm',
'swf',
'ts',
'vob',
'vp9',
),
# audio
MEDIA_EXTENSIONS.audio, (
'3ga',
'ac3',
'adts',
'aif',
'au',
'dts',
'isma',
'it',
'mid',
'mod',
'mpga',
'mp1',
'mp2',
'mp4a',
'mpa',
'ra',
'shn',
'xm',
),
# image
MEDIA_EXTENSIONS.thumbnails, (
'avif',
'bmp',
'gif',
'ico',
'heic',
'jng',
'jpeg',
'jxl',
'svg',
'tif',
'tiff',
'wbmp',
),
# subtitle
MEDIA_EXTENSIONS.subtitles, (
'dfxp',
'fs',
'ismt',
'json3',
'sami',
'scc',
'srv1',
'srv2',
'srv3',
'ssa',
'tt',
'xml',
),
# others
MEDIA_EXTENSIONS.manifests,
(
# not used in yt-dl
# *MEDIA_EXTENSIONS.storyboards,
# 'desktop',
# 'ism',
# 'm3u',
# 'sbv',
# 'swp',
# 'url',
# 'webloc',
)))
def __init__(self, extension):
super(_UnsafeExtensionError, self).__init__('unsafe file extension: {0!r}'.format(extension))
self.extension = extension
# support --no-check-extensions
lenient = False
@classmethod
def sanitize_extension(cls, extension, **kwargs):
# ... /, *, prepend=False
prepend = kwargs.get('prepend', False)
if '/' in extension or '\\' in extension:
raise cls(extension)
if not prepend:
last = extension.rpartition('.')[-1]
if last == 'bin':
extension = last = 'unknown_video'
if not (cls.lenient or last.lower() in cls._ALLOWED_EXTENSIONS):
raise cls(extension)
return extension