mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-01 22:08:36 +09:00
Compare commits
5 Commits
master
...
pull/30733
Author | SHA1 | Date | |
---|---|---|---|
![]() |
e5bfed6c77 | ||
![]() |
38fce984f4 | ||
![]() |
49093c09c0 | ||
![]() |
f3cf092584 | ||
![]() |
34e1010545 |
17
.github/workflows/ci.yml
vendored
17
.github/workflows/ci.yml
vendored
@@ -116,29 +116,29 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-22.04]
|
||||
os: [ubuntu-20.04]
|
||||
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
|
||||
python-impl: [cpython]
|
||||
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
- os: windows-2022
|
||||
- os: windows-2019
|
||||
python-version: 3.4
|
||||
python-impl: cpython
|
||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||
run-tests-ext: bat
|
||||
- os: windows-2022
|
||||
- os: windows-2019
|
||||
python-version: 3.4
|
||||
python-impl: cpython
|
||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||
run-tests-ext: bat
|
||||
# jython
|
||||
- os: ubuntu-22.04
|
||||
- os: ubuntu-20.04
|
||||
python-version: 2.7
|
||||
python-impl: jython
|
||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
|
||||
run-tests-ext: sh
|
||||
- os: ubuntu-22.04
|
||||
- os: ubuntu-20.04
|
||||
python-version: 2.7
|
||||
python-impl: jython
|
||||
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
|
||||
@@ -160,7 +160,7 @@ jobs:
|
||||
# NB may run apt-get install in Linux
|
||||
uses: ytdl-org/setup-python@v1
|
||||
env:
|
||||
# Temporary (?) workaround for Python 3.5 failures - May 2024
|
||||
# Temporary workaround for Python 3.5 failures - May 2024
|
||||
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
@@ -240,10 +240,7 @@ jobs:
|
||||
# install 2.7
|
||||
shell: bash
|
||||
run: |
|
||||
# Ubuntu 22.04 no longer has python-is-python2: fetch it
|
||||
curl -L "http://launchpadlibrarian.net/474693132/python-is-python2_2.7.17-4_all.deb" -o python-is-python2.deb
|
||||
sudo apt-get install -y python2
|
||||
sudo dpkg --force-breaks -i python-is-python2.deb
|
||||
sudo apt-get install -y python2 python-is-python2
|
||||
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
|
||||
#-------- Python 2.6 --
|
||||
- name: Set up Python 2.6 environment
|
||||
|
@@ -63,21 +63,9 @@ class TestCache(unittest.TestCase):
|
||||
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
||||
c.store('test_cache', 'k.', obj)
|
||||
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
|
||||
new_version = '.'.join(('%0.2d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
|
||||
new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
|
||||
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
|
||||
|
||||
def test_cache_clear(self):
|
||||
ydl = FakeYDL({
|
||||
'cachedir': self.test_dir,
|
||||
})
|
||||
c = Cache(ydl)
|
||||
c.store('test_cache', 'k.', 'kay')
|
||||
c.store('test_cache', 'l.', 'ell')
|
||||
self.assertEqual(c.load('test_cache', 'k.'), 'kay')
|
||||
c.clear('test_cache', 'k.')
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
self.assertEqual(c.load('test_cache', 'l.'), 'ell')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -7,14 +6,12 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
|
||||
from youtube_dl.compat import compat_str as str
|
||||
from youtube_dl.compat import compat_str
|
||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
NaN = object()
|
||||
@@ -22,7 +19,7 @@ NaN = object()
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
def _test(self, jsi_or_code, expected, func='f', args=()):
|
||||
if isinstance(jsi_or_code, str):
|
||||
if isinstance(jsi_or_code, compat_str):
|
||||
jsi_or_code = JSInterpreter(jsi_or_code)
|
||||
got = jsi_or_code.call_function(func, *args)
|
||||
if expected is NaN:
|
||||
@@ -43,27 +40,16 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return 42 + 7;}', 49)
|
||||
self._test('function f(){return 42 + undefined;}', NaN)
|
||||
self._test('function f(){return 42 + null;}', 42)
|
||||
self._test('function f(){return 1 + "";}', '1')
|
||||
self._test('function f(){return 42 + "7";}', '427')
|
||||
self._test('function f(){return false + true;}', 1)
|
||||
self._test('function f(){return "false" + true;}', 'falsetrue')
|
||||
self._test('function f(){return '
|
||||
'1 + "2" + [3,4] + {k: 56} + null + undefined + Infinity;}',
|
||||
'123,4[object Object]nullundefinedInfinity')
|
||||
|
||||
def test_sub(self):
|
||||
self._test('function f(){return 42 - 7;}', 35)
|
||||
self._test('function f(){return 42 - undefined;}', NaN)
|
||||
self._test('function f(){return 42 - null;}', 42)
|
||||
self._test('function f(){return 42 - "7";}', 35)
|
||||
self._test('function f(){return 42 - "spam";}', NaN)
|
||||
|
||||
def test_mul(self):
|
||||
self._test('function f(){return 42 * 7;}', 294)
|
||||
self._test('function f(){return 42 * undefined;}', NaN)
|
||||
self._test('function f(){return 42 * null;}', 0)
|
||||
self._test('function f(){return 42 * "7";}', 294)
|
||||
self._test('function f(){return 42 * "eggs";}', NaN)
|
||||
|
||||
def test_div(self):
|
||||
jsi = JSInterpreter('function f(a, b){return a / b;}')
|
||||
@@ -71,26 +57,17 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, NaN, args=(JS_Undefined, 1))
|
||||
self._test(jsi, float('inf'), args=(2, 0))
|
||||
self._test(jsi, 0, args=(0, 3))
|
||||
self._test(jsi, 6, args=(42, 7))
|
||||
self._test(jsi, 0, args=(42, float('inf')))
|
||||
self._test(jsi, 6, args=("42", 7))
|
||||
self._test(jsi, NaN, args=("spam", 7))
|
||||
|
||||
def test_mod(self):
|
||||
self._test('function f(){return 42 % 7;}', 0)
|
||||
self._test('function f(){return 42 % 0;}', NaN)
|
||||
self._test('function f(){return 42 % undefined;}', NaN)
|
||||
self._test('function f(){return 42 % "7";}', 0)
|
||||
self._test('function f(){return 42 % "beans";}', NaN)
|
||||
|
||||
def test_exp(self):
|
||||
self._test('function f(){return 42 ** 2;}', 1764)
|
||||
self._test('function f(){return 42 ** undefined;}', NaN)
|
||||
self._test('function f(){return 42 ** null;}', 1)
|
||||
self._test('function f(){return undefined ** 0;}', 1)
|
||||
self._test('function f(){return undefined ** 42;}', NaN)
|
||||
self._test('function f(){return 42 ** "2";}', 1764)
|
||||
self._test('function f(){return 42 ** "spam";}', NaN)
|
||||
|
||||
def test_calc(self):
|
||||
self._test('function f(a){return 2*a+1;}', 7, args=[3])
|
||||
@@ -112,60 +89,13 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return 19 & 21;}', 17)
|
||||
self._test('function f(){return 11 >> 2;}', 2)
|
||||
self._test('function f(){return []? 2+3: 4;}', 5)
|
||||
# equality
|
||||
self._test('function f(){return 1 == 1}', True)
|
||||
self._test('function f(){return 1 == 1.0}', True)
|
||||
self._test('function f(){return 1 == "1"}', True)
|
||||
self._test('function f(){return 1 == 2}', False)
|
||||
self._test('function f(){return 1 != "1"}', False)
|
||||
self._test('function f(){return 1 != 2}', True)
|
||||
self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True)
|
||||
self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False)
|
||||
self._test('function f(){return NaN == NaN}', False)
|
||||
self._test('function f(){return null == undefined}', True)
|
||||
self._test('function f(){return "spam, eggs" == "spam, eggs"}', True)
|
||||
# strict equality
|
||||
self._test('function f(){return 1 === 1}', True)
|
||||
self._test('function f(){return 1 === 1.0}', True)
|
||||
self._test('function f(){return 1 === "1"}', False)
|
||||
self._test('function f(){return 1 === 2}', False)
|
||||
self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True)
|
||||
self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False)
|
||||
self._test('function f(){return NaN === NaN}', False)
|
||||
self._test('function f(){return null === undefined}', False)
|
||||
self._test('function f(){return null === null}', True)
|
||||
self._test('function f(){return undefined === undefined}', True)
|
||||
self._test('function f(){return "uninterned" === "uninterned"}', True)
|
||||
self._test('function f(){return 1 === 1}', True)
|
||||
self._test('function f(){return 1 === "1"}', False)
|
||||
self._test('function f(){return 1 !== 1}', False)
|
||||
self._test('function f(){return 1 !== "1"}', True)
|
||||
# expressions
|
||||
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||
self._test('function f(){return 0 ?? 42;}', 0)
|
||||
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/32815
|
||||
self._test('function f(){return 0 - 7 * - 6;}', 42)
|
||||
|
||||
def test_bitwise_operators_typecast(self):
|
||||
# madness
|
||||
self._test('function f(){return null << 5}', 0)
|
||||
self._test('function f(){return undefined >> 5}', 0)
|
||||
self._test('function f(){return 42 << NaN}', 42)
|
||||
self._test('function f(){return 42 << Infinity}', 42)
|
||||
self._test('function f(){return 0.0 << null}', 0)
|
||||
self._test('function f(){return NaN << 42}', 0)
|
||||
self._test('function f(){return "21.9" << 1}', 42)
|
||||
self._test('function f(){return true << "5";}', 32)
|
||||
self._test('function f(){return true << true;}', 2)
|
||||
self._test('function f(){return "19" & "21.9";}', 17)
|
||||
self._test('function f(){return "19" & false;}', 0)
|
||||
self._test('function f(){return "11.0" >> "2.1";}', 2)
|
||||
self._test('function f(){return 5 ^ 9;}', 12)
|
||||
self._test('function f(){return 0.0 << NaN}', 0)
|
||||
self._test('function f(){return null << undefined}', 0)
|
||||
self._test('function f(){return 21 << 4294967297}', 42)
|
||||
|
||||
def test_array_access(self):
|
||||
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
||||
|
||||
@@ -180,8 +110,8 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
|
||||
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||
self._test('function f(){var x = 2; var y = ["a", "b"]; y[x%y["length"]]="z"; return y}', ['z', 'b'])
|
||||
|
||||
@unittest.skip('Not yet fully implemented')
|
||||
def test_comments(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@@ -200,15 +130,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}
|
||||
''', 3)
|
||||
|
||||
self._test('''
|
||||
function f() {
|
||||
var x = ( /* 1 + */ 2 +
|
||||
/* 30 * 40 */
|
||||
50);
|
||||
return x;
|
||||
}
|
||||
''', 52)
|
||||
|
||||
def test_precedence(self):
|
||||
self._test('''
|
||||
function f() {
|
||||
@@ -230,34 +151,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
|
||||
# epoch 0
|
||||
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
|
||||
# undefined
|
||||
self._test(jsi, NaN, args=[JS_Undefined])
|
||||
# y,m,d, ... - may fail with older dates lacking DST data
|
||||
jsi = JSInterpreter(
|
||||
'function f() { return new Date(%s); }'
|
||||
% ('2024, 5, 29, 2, 52, 12, 42',))
|
||||
self._test(jsi, (
|
||||
1719625932042 # UK value
|
||||
+ (
|
||||
+ 3600 # back to GMT
|
||||
+ (time.altzone if time.daylight # host's DST
|
||||
else time.timezone)
|
||||
) * 1000))
|
||||
# no arg
|
||||
self.assertAlmostEqual(JSInterpreter(
|
||||
'function f() { return new Date() - 0; }').call_function('f'),
|
||||
time.time() * 1000, delta=100)
|
||||
# Date.now()
|
||||
self.assertAlmostEqual(JSInterpreter(
|
||||
'function f() { return Date.now(); }').call_function('f'),
|
||||
time.time() * 1000, delta=100)
|
||||
# Date.parse()
|
||||
jsi = JSInterpreter('function f(dt) { return Date.parse(dt); }')
|
||||
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
|
||||
# Date.UTC()
|
||||
jsi = JSInterpreter('function f() { return Date.UTC(%s); }'
|
||||
% ('1970, 0, 1, 0, 0, 0, 0',))
|
||||
self._test(jsi, 0)
|
||||
|
||||
def test_call(self):
|
||||
jsi = JSInterpreter('''
|
||||
@@ -372,28 +265,8 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
|
||||
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
||||
|
||||
def test_not(self):
|
||||
self._test('function f() { return ! undefined; }', True)
|
||||
self._test('function f() { return !0; }', True)
|
||||
self._test('function f() { return !!0; }', False)
|
||||
self._test('function f() { return ![]; }', False)
|
||||
self._test('function f() { return !0 !== false; }', True)
|
||||
|
||||
def test_void(self):
|
||||
self._test('function f() { return void 42; }', JS_Undefined)
|
||||
|
||||
def test_typeof(self):
|
||||
self._test('function f() { return typeof undefined; }', 'undefined')
|
||||
self._test('function f() { return typeof NaN; }', 'number')
|
||||
self._test('function f() { return typeof Infinity; }', 'number')
|
||||
self._test('function f() { return typeof true; }', 'boolean')
|
||||
self._test('function f() { return typeof null; }', 'object')
|
||||
self._test('function f() { return typeof "a string"; }', 'string')
|
||||
self._test('function f() { return typeof 42; }', 'number')
|
||||
self._test('function f() { return typeof 42.42; }', 'number')
|
||||
self._test('function f() { var g = function(){}; return typeof g; }', 'function')
|
||||
self._test('function f() { return typeof {key: "value"}; }', 'object')
|
||||
# not yet implemented: Symbol, BigInt
|
||||
self._test('function f() { return void 42; }', None)
|
||||
|
||||
def test_return_function(self):
|
||||
jsi = JSInterpreter('''
|
||||
@@ -410,7 +283,7 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
def test_undefined(self):
|
||||
self._test('function f() { return undefined === undefined; }', True)
|
||||
self._test('function f() { return undefined; }', JS_Undefined)
|
||||
self._test('function f() { return undefined ?? 42; }', 42)
|
||||
self._test('function f() {return undefined ?? 42; }', 42)
|
||||
self._test('function f() { let v; return v; }', JS_Undefined)
|
||||
self._test('function f() { let v; return v**0; }', 1)
|
||||
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
|
||||
@@ -451,19 +324,8 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
|
||||
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
|
||||
|
||||
def test_indexing(self):
|
||||
self._test('function f() { return [1, 2, 3, 4][3]}', 4)
|
||||
self._test('function f() { return [1, [2, [3, [4]]]][1][1][1][0]}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o[3]}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o["3"]}', 4)
|
||||
self._test('function f() { return [1, [2, {3: [4]}]][1][1]["3"][0]}', 4)
|
||||
self._test('function f() { return [1, 2, 3, 4].length}', 4)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; return o.length}', JS_Undefined)
|
||||
self._test('function f() { var o = {1: 2, 3: 4}; o["length"] = 42; return o.length}', 42)
|
||||
|
||||
def test_regex(self):
|
||||
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||
self._test('function f() { let a=/,,[/,913,/](,)}/; return a.source; }', ',,[/,913,/](,)}')
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
||||
@@ -511,6 +373,13 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return -524999584 << 5}', 379882496)
|
||||
self._test('function f(){return 1236566549 << 5}', 915423904)
|
||||
|
||||
def test_bitwise_operators_typecast(self):
|
||||
# madness
|
||||
self._test('function f(){return null << 5}', 0)
|
||||
self._test('function f(){return undefined >> 5}', 0)
|
||||
self._test('function f(){return 42 << NaN}', 42)
|
||||
self._test('function f(){return 42 << Infinity}', 42)
|
||||
|
||||
def test_negative(self):
|
||||
self._test('function f(){return 2 * -2.0 ;}', -4)
|
||||
self._test('function f(){return 2 - - -2 ;}', 0)
|
||||
@@ -542,19 +411,10 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||
self._test(jsi, '', args=[[], '-'])
|
||||
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
|
||||
'1,1,abc,[object Object],,,Infinity,NaN')
|
||||
self._test('function f(){return '
|
||||
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
|
||||
'1~1~abc~[object Object]~~~Infinity~NaN')
|
||||
|
||||
def test_split(self):
|
||||
test_result = list('test')
|
||||
tests = [
|
||||
'function f(a, b){return a.split(b)}',
|
||||
'function f(a, b){return a["split"](b)}',
|
||||
'function f(a, b){let x = ["split"]; return a[x[0]](b)}',
|
||||
'function f(a, b){return String.prototype.split.call(a, b)}',
|
||||
'function f(a, b){return String.prototype.split.apply(a, [b])}',
|
||||
]
|
||||
@@ -564,18 +424,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||
self._test(jsi, [''], args=['', '-'])
|
||||
self._test(jsi, [], args=['', ''])
|
||||
# RegExp split
|
||||
self._test('function f(){return "test".split(/(?:)/)}',
|
||||
['t', 'e', 's', 't'])
|
||||
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
|
||||
['t', 't'])
|
||||
# from MDN: surrogate pairs aren't handled: case 1 fails
|
||||
# self._test('function f(){return "😄😄".split(/(?:)/)}',
|
||||
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
|
||||
# case 2 beats Py3.2: it gets the case 1 result
|
||||
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
|
||||
self._test('function f(){return "😄😄".split(/(?:)/u)}',
|
||||
['😄', '😄'])
|
||||
|
||||
def test_slice(self):
|
||||
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||||
@@ -605,53 +453,6 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||
|
||||
def test_splice(self):
|
||||
self._test('function f(){var T = ["0", "1", "2"]; T["splice"](2, 1, "0")[0]; return T }', ['0', '1', '0'])
|
||||
|
||||
def test_pop(self):
|
||||
# pop
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
|
||||
[8, [0, 1, 2, 3, 4, 5, 6, 7]])
|
||||
self._test('function f(){return [].pop()}', JS_Undefined)
|
||||
# push
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
|
||||
[5, [0, 1, 2, 3, 4]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_shift(self):
|
||||
# shift
|
||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
|
||||
[0, [1, 2, 3, 4, 5, 6, 7, 8]])
|
||||
self._test('function f(){return [].shift()}', JS_Undefined)
|
||||
# unshift
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
|
||||
[5, [3, 4, 0, 1, 2]])
|
||||
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
|
||||
[3, [0, 1, 2]])
|
||||
|
||||
def test_forEach(self):
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){ret.push([e,i,a]);}; '
|
||||
'l.forEach(log); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
self._test('function f(){var ret = []; var l = [4, 2]; '
|
||||
'var log = function(e,i,a){this.push([e,i,a]);}; '
|
||||
'l.forEach(log, ret); '
|
||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||
[2, 4, 1, [4, 2]])
|
||||
|
||||
def test_extract_function(self):
|
||||
jsi = JSInterpreter('function a(b) { return b + 1; }')
|
||||
func = jsi.extract_function('a')
|
||||
self.assertEqual(func([2]), 3)
|
||||
|
||||
def test_extract_function_with_global_stack(self):
|
||||
jsi = JSInterpreter('function c(d) { return d + e + f + g; }')
|
||||
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
|
||||
self.assertEqual(func([1]), 1111)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -13,7 +12,6 @@ import re
|
||||
import string
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_contextlib_suppress,
|
||||
compat_open as open,
|
||||
compat_str,
|
||||
compat_urlretrieve,
|
||||
@@ -52,93 +50,23 @@ _SIG_TESTS = [
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
84,
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@@ -208,16 +136,12 @@ _NSIG_TESTS = [
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
|
||||
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
|
||||
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||
@@ -228,11 +152,7 @@ _NSIG_TESTS = [
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
|
||||
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
|
||||
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
|
||||
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||
@@ -262,94 +182,6 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
|
||||
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
|
||||
'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f6e09c70/player_ias.vflset/en_US/base.js',
|
||||
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
|
||||
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
|
||||
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
|
||||
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
|
||||
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
||||
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||
'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/aa3fc80b/player_ias.vflset/en_US/base.js',
|
||||
'0qY9dal2uzOnOGwa-48hha', 'VSh1KDfQMk-eag',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -362,8 +194,6 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
|
||||
('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
|
||||
# obsolete
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||
@@ -373,9 +203,8 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||
)
|
||||
ie = YoutubeIE(FakeYDL({'cachedir': False}))
|
||||
for player_url, expected_player_id in PLAYER_URLS:
|
||||
player_id = ie._extract_player_info(player_url)
|
||||
player_id = YoutubeIE._extract_player_info(player_url)
|
||||
self.assertEqual(player_id, expected_player_id)
|
||||
|
||||
|
||||
@@ -387,19 +216,21 @@ class TestSignature(unittest.TestCase):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
def tearDown(self):
|
||||
with compat_contextlib_suppress(OSError):
|
||||
try:
|
||||
for f in os.listdir(self.TESTDATA_DIR):
|
||||
os.remove(f)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def t_factory(name, sig_func, url_pattern):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
m = url_pattern.match(url)
|
||||
assert m, '{0!r} should follow URL format'.format(url)
|
||||
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group('id')
|
||||
|
||||
def test_func(self):
|
||||
basename = 'player-{0}.js'.format(test_id)
|
||||
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
@@ -414,7 +245,7 @@ def t_factory(name, sig_func, url_pattern):
|
||||
|
||||
|
||||
def signature(jscode, sig_input):
|
||||
func = YoutubeIE(FakeYDL({'cachedir': False}))._parse_sig_js(jscode)
|
||||
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
|
||||
src_sig = (
|
||||
compat_str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
@@ -422,23 +253,17 @@ def signature(jscode, sig_input):
|
||||
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
ie = YoutubeIE(FakeYDL({'cachedir': False}))
|
||||
jsi = JSInterpreter(jscode)
|
||||
jsi, _, func_code = ie._extract_n_function_code_jsi(sig_input, jsi)
|
||||
return ie._extract_n_function_from_code(jsi, func_code)(sig_input)
|
||||
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
'signature', signature,
|
||||
re.compile(r'''(?x)
|
||||
.+/(?P<h5>html5)?player(?(h5)(?:-en_US)?-|/)(?P<id>[a-zA-Z0-9/._-]+)
|
||||
(?(h5)/(?:watch_as3|html5player))?\.js$
|
||||
'''))
|
||||
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
|
||||
for test_spec in _SIG_TESTS:
|
||||
make_sig_test(*test_spec)
|
||||
|
||||
make_nsig_test = t_factory(
|
||||
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
|
||||
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
|
||||
for test_spec in _NSIG_TESTS:
|
||||
make_nsig_test(*test_spec)
|
||||
|
||||
|
@@ -540,14 +540,10 @@ class YoutubeDL(object):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
return self.to_stdout(message, skip_eol, check_quiet=True)
|
||||
|
||||
def _write_string(self, s, out=None, only_once=False, _cache=set()):
|
||||
if only_once and s in _cache:
|
||||
return
|
||||
def _write_string(self, s, out=None):
|
||||
write_string(s, out=out, encoding=self.params.get('encoding'))
|
||||
if only_once:
|
||||
_cache.add(s)
|
||||
|
||||
def to_stdout(self, message, skip_eol=False, check_quiet=False, only_once=False):
|
||||
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
||||
"""Print message to stdout if not in quiet mode."""
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
@@ -556,9 +552,9 @@ class YoutubeDL(object):
|
||||
terminator = ['\n', ''][skip_eol]
|
||||
output = message + terminator
|
||||
|
||||
self._write_string(output, self._screen_file, only_once=only_once)
|
||||
self._write_string(output, self._screen_file)
|
||||
|
||||
def to_stderr(self, message, only_once=False):
|
||||
def to_stderr(self, message):
|
||||
"""Print message to stderr."""
|
||||
assert isinstance(message, compat_str)
|
||||
if self.params.get('logger'):
|
||||
@@ -566,7 +562,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
message = self._bidi_workaround(message)
|
||||
output = message + '\n'
|
||||
self._write_string(output, self._err_file, only_once=only_once)
|
||||
self._write_string(output, self._err_file)
|
||||
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
@@ -645,11 +641,18 @@ class YoutubeDL(object):
|
||||
raise DownloadError(message, exc_info)
|
||||
self._download_retcode = 1
|
||||
|
||||
def report_warning(self, message, only_once=False):
|
||||
def report_warning(self, message, only_once=False, _cache={}):
|
||||
'''
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
if only_once:
|
||||
m_hash = hash((self, message))
|
||||
m_cnt = _cache.setdefault(m_hash, 0)
|
||||
_cache[m_hash] = m_cnt + 1
|
||||
if m_cnt > 0:
|
||||
return
|
||||
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
@@ -660,7 +663,7 @@ class YoutubeDL(object):
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
warning_message = '%s %s' % (_msg_header, message)
|
||||
self.to_stderr(warning_message, only_once=only_once)
|
||||
self.to_stderr(warning_message)
|
||||
|
||||
def report_error(self, message, *args, **kwargs):
|
||||
'''
|
||||
@@ -674,16 +677,6 @@ class YoutubeDL(object):
|
||||
kwargs['message'] = '%s %s' % (_msg_header, message)
|
||||
self.trouble(*args, **kwargs)
|
||||
|
||||
def write_debug(self, message, only_once=False):
|
||||
'''Log debug message or Print message to stderr'''
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
message = '[debug] {0}'.format(message)
|
||||
if self.params.get('logger'):
|
||||
self.params['logger'].debug(message)
|
||||
else:
|
||||
self.to_stderr(message, only_once)
|
||||
|
||||
def report_unscoped_cookies(self, *args, **kwargs):
|
||||
# message=None, tb=False, is_error=False
|
||||
if len(args) <= 2:
|
||||
@@ -2521,7 +2514,7 @@ class YoutubeDL(object):
|
||||
self.get_encoding()))
|
||||
write_string(encoding_str, encoding=None)
|
||||
|
||||
writeln_debug = lambda *s: self.write_debug(''.join(s))
|
||||
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
|
||||
writeln_debug('youtube-dl version ', __version__)
|
||||
if _LAZY_LOADER:
|
||||
writeln_debug('Lazy loading extractors enabled')
|
||||
|
@@ -18,7 +18,7 @@ from .compat import (
|
||||
compat_getpass,
|
||||
compat_register_utf8,
|
||||
compat_shlex_split,
|
||||
_workaround_optparse_bug9161,
|
||||
workaround_optparse_bug9161,
|
||||
)
|
||||
from .utils import (
|
||||
_UnsafeExtensionError,
|
||||
@@ -50,7 +50,7 @@ def _real_main(argv=None):
|
||||
# Compatibility fix for Windows
|
||||
compat_register_utf8()
|
||||
|
||||
_workaround_optparse_bug9161()
|
||||
workaround_optparse_bug9161()
|
||||
|
||||
setproctitle('youtube-dl')
|
||||
|
||||
@@ -409,8 +409,6 @@ def _real_main(argv=None):
|
||||
'include_ads': opts.include_ads,
|
||||
'default_search': opts.default_search,
|
||||
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
|
||||
'youtube_player_js_version': opts.youtube_player_js_version,
|
||||
'youtube_player_js_variant': opts.youtube_player_js_variant,
|
||||
'encoding': opts.encoding,
|
||||
'extract_flat': opts.extract_flat,
|
||||
'mark_watched': opts.mark_watched,
|
||||
|
@@ -1,4 +1,3 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import errno
|
||||
@@ -11,14 +10,12 @@ import traceback
|
||||
from .compat import (
|
||||
compat_getenv,
|
||||
compat_open as open,
|
||||
compat_os_makedirs,
|
||||
)
|
||||
from .utils import (
|
||||
error_to_compat_str,
|
||||
escape_rfc3986,
|
||||
expand_path,
|
||||
is_outdated_version,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
write_json_file,
|
||||
)
|
||||
from .version import __version__
|
||||
@@ -33,35 +30,23 @@ class Cache(object):
|
||||
def __init__(self, ydl):
|
||||
self._ydl = ydl
|
||||
|
||||
def _write_debug(self, *args, **kwargs):
|
||||
self._ydl.write_debug(*args, **kwargs)
|
||||
|
||||
def _report_warning(self, *args, **kwargs):
|
||||
self._ydl.report_warning(*args, **kwargs)
|
||||
|
||||
def _to_screen(self, *args, **kwargs):
|
||||
self._ydl.to_screen(*args, **kwargs)
|
||||
|
||||
def _get_param(self, k, default=None):
|
||||
return self._ydl.params.get(k, default)
|
||||
|
||||
def _get_root_dir(self):
|
||||
res = self._get_param('cachedir')
|
||||
res = self._ydl.params.get('cachedir')
|
||||
if res is None:
|
||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, self._YTDL_DIR)
|
||||
return expand_path(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
assert re.match(r'^[\w.-]+$', section), \
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
||||
'invalid section %r' % section
|
||||
key = escape_rfc3986(key, safe='').replace('%', ',') # encode non-ascii characters
|
||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
|
||||
return os.path.join(
|
||||
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
||||
|
||||
@property
|
||||
def enabled(self):
|
||||
return self._get_param('cachedir') is not False
|
||||
return self._ydl.params.get('cachedir') is not False
|
||||
|
||||
def store(self, section, key, data, dtype='json'):
|
||||
assert dtype in ('json',)
|
||||
@@ -71,75 +56,61 @@ class Cache(object):
|
||||
|
||||
fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
compat_os_makedirs(os.path.dirname(fn), exist_ok=True)
|
||||
self._write_debug('Saving {section}.{key} to cache'.format(section=section, key=key))
|
||||
try:
|
||||
os.makedirs(os.path.dirname(fn))
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._report_warning('Writing cache to {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
|
||||
|
||||
def clear(self, section, key, dtype='json'):
|
||||
|
||||
if not self.enabled:
|
||||
return
|
||||
|
||||
fn = self._get_cache_fn(section, key, dtype)
|
||||
self._write_debug('Clearing {section}.{key} from cache'.format(section=section, key=key))
|
||||
try:
|
||||
os.remove(fn)
|
||||
except Exception as e:
|
||||
if getattr(e, 'errno') == errno.ENOENT:
|
||||
# file not found
|
||||
return
|
||||
tb = traceback.format_exc()
|
||||
self._report_warning('Clearing cache from {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
|
||||
self._ydl.report_warning(
|
||||
'Writing cache to %r failed: %s' % (fn, tb))
|
||||
|
||||
def _validate(self, data, min_ver):
|
||||
version = traverse_obj(data, self._VERSION_KEY)
|
||||
version = try_get(data, lambda x: x[self._VERSION_KEY])
|
||||
if not version: # Backward compatibility
|
||||
data, version = {'data': data}, self._DEFAULT_VERSION
|
||||
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
||||
return data['data']
|
||||
self._write_debug('Discarding old cache from version {version} (needs {min_ver})'.format(version=version, min_ver=min_ver))
|
||||
self._ydl.to_screen(
|
||||
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None, **kw_min_ver):
|
||||
def load(self, section, key, dtype='json', default=None, min_ver=None):
|
||||
assert dtype in ('json',)
|
||||
min_ver = kw_min_ver.get('min_ver')
|
||||
|
||||
if not self.enabled:
|
||||
return default
|
||||
|
||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||
try:
|
||||
with open(cache_fn, encoding='utf-8') as cachef:
|
||||
self._write_debug('Loading {section}.{key} from cache'.format(section=section, key=key), only_once=True)
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except (ValueError, KeyError):
|
||||
try:
|
||||
file_size = 'size: %d' % os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = error_to_compat_str(oe)
|
||||
self._report_warning('Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except Exception as e:
|
||||
if getattr(e, 'errno') == errno.ENOENT:
|
||||
# no cache available
|
||||
return
|
||||
self._report_warning('Cache retrieval from %s failed' % (cache_fn,))
|
||||
with open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = error_to_compat_str(oe)
|
||||
self._ydl.report_warning(
|
||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except IOError:
|
||||
pass # No cache available
|
||||
|
||||
return default
|
||||
|
||||
def remove(self):
|
||||
if not self.enabled:
|
||||
self._to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||
return
|
||||
|
||||
cachedir = self._get_root_dir()
|
||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % (cachedir,))
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
||||
|
||||
self._to_screen(
|
||||
'Removing cache dir %s .' % (cachedir,), skip_eol=True, ),
|
||||
self._ydl.to_screen(
|
||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
||||
if os.path.exists(cachedir):
|
||||
self._to_screen('.', skip_eol=True)
|
||||
self._ydl.to_screen('.', skip_eol=True)
|
||||
shutil.rmtree(cachedir)
|
||||
self._to_screen('.')
|
||||
self._ydl.to_screen('.')
|
||||
|
@@ -10,10 +10,9 @@ from .compat import (
|
||||
# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
|
||||
# In case newly foldable Unicode characters are defined, paste the new version
|
||||
# of the text inside the ''' marks.
|
||||
# The text is expected to have only blank lines and lines with 1st character #,
|
||||
# The text is expected to have only blank lines andlines with 1st character #,
|
||||
# all ignored, and fold definitions like this:
|
||||
# `from_hex_code; status; space_separated_to_hex_code_list; comment`
|
||||
# Only `status` C/F are used.
|
||||
# `from_hex_code; space_separated_to_hex_code_list; comment`
|
||||
|
||||
_map_str = '''
|
||||
# CaseFolding-15.0.0.txt
|
||||
@@ -1658,6 +1657,11 @@ _map = dict(
|
||||
del _map_str
|
||||
|
||||
|
||||
def _casefold(s):
|
||||
def casefold(s):
|
||||
assert isinstance(s, compat_str)
|
||||
return ''.join((_map.get(c, c) for c in s))
|
||||
|
||||
|
||||
__all__ = [
|
||||
'casefold',
|
||||
]
|
||||
|
@@ -16,6 +16,7 @@ import os
|
||||
import platform
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
import socket
|
||||
import struct
|
||||
import subprocess
|
||||
@@ -23,15 +24,11 @@ import sys
|
||||
import types
|
||||
import xml.etree.ElementTree
|
||||
|
||||
_IDENTITY = lambda x: x
|
||||
|
||||
# naming convention
|
||||
# 'compat_' + Python3_name.replace('.', '_')
|
||||
# other aliases exist for convenience and/or legacy
|
||||
# wrap disposable test values in type() to reclaim storage
|
||||
|
||||
# deal with critical unicode/str things first:
|
||||
# compat_str, compat_basestring, compat_chr
|
||||
# deal with critical unicode/str things first
|
||||
try:
|
||||
# Python 2
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
@@ -42,23 +39,18 @@ except NameError:
|
||||
str, (str, bytes), chr
|
||||
)
|
||||
|
||||
|
||||
# compat_casefold
|
||||
# casefold
|
||||
try:
|
||||
compat_str.casefold
|
||||
compat_casefold = lambda s: s.casefold()
|
||||
except AttributeError:
|
||||
from .casefold import _casefold as compat_casefold
|
||||
from .casefold import casefold as compat_casefold
|
||||
|
||||
|
||||
# compat_collections_abc
|
||||
try:
|
||||
import collections.abc as compat_collections_abc
|
||||
except ImportError:
|
||||
import collections as compat_collections_abc
|
||||
|
||||
|
||||
# compat_urllib_request
|
||||
try:
|
||||
import urllib.request as compat_urllib_request
|
||||
except ImportError: # Python 2
|
||||
@@ -87,15 +79,11 @@ except TypeError:
|
||||
_add_init_method_arg(compat_urllib_request.Request)
|
||||
del _add_init_method_arg
|
||||
|
||||
|
||||
# compat_urllib_error
|
||||
try:
|
||||
import urllib.error as compat_urllib_error
|
||||
except ImportError: # Python 2
|
||||
import urllib2 as compat_urllib_error
|
||||
|
||||
|
||||
# compat_urllib_parse
|
||||
try:
|
||||
import urllib.parse as compat_urllib_parse
|
||||
except ImportError: # Python 2
|
||||
@@ -110,23 +98,17 @@ except ImportError: # Python 2
|
||||
compat_urlparse = compat_urllib_parse
|
||||
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
|
||||
|
||||
|
||||
# compat_urllib_response
|
||||
try:
|
||||
import urllib.response as compat_urllib_response
|
||||
except ImportError: # Python 2
|
||||
import urllib as compat_urllib_response
|
||||
|
||||
|
||||
# compat_urllib_response.addinfourl
|
||||
try:
|
||||
compat_urllib_response.addinfourl.status
|
||||
except AttributeError:
|
||||
# .getcode() is deprecated in Py 3.
|
||||
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
|
||||
|
||||
|
||||
# compat_http_cookiejar
|
||||
try:
|
||||
import http.cookiejar as compat_cookiejar
|
||||
except ImportError: # Python 2
|
||||
@@ -145,16 +127,12 @@ else:
|
||||
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
|
||||
|
||||
|
||||
# compat_http_cookies
|
||||
try:
|
||||
import http.cookies as compat_cookies
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
compat_http_cookies = compat_cookies
|
||||
|
||||
|
||||
# compat_http_cookies_SimpleCookie
|
||||
if sys.version_info[0] == 2 or sys.version_info < (3, 3):
|
||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||
def load(self, rawdata):
|
||||
@@ -177,15 +155,11 @@ else:
|
||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
||||
|
||||
|
||||
# compat_html_entities, probably useless now
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
except ImportError: # Python 2
|
||||
import htmlentitydefs as compat_html_entities
|
||||
|
||||
|
||||
# compat_html_entities_html5
|
||||
try: # Python >= 3.3
|
||||
compat_html_entities_html5 = compat_html_entities.html5
|
||||
except AttributeError:
|
||||
@@ -2434,24 +2408,18 @@ except AttributeError:
|
||||
# Py < 3.1
|
||||
compat_http_client.HTTPResponse.getcode = lambda self: self.status
|
||||
|
||||
|
||||
# compat_urllib_HTTPError
|
||||
try:
|
||||
from urllib.error import HTTPError as compat_HTTPError
|
||||
except ImportError: # Python 2
|
||||
from urllib2 import HTTPError as compat_HTTPError
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
|
||||
|
||||
# compat_urllib_request_urlretrieve
|
||||
try:
|
||||
from urllib.request import urlretrieve as compat_urlretrieve
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlretrieve as compat_urlretrieve
|
||||
compat_urllib_request_urlretrieve = compat_urlretrieve
|
||||
|
||||
|
||||
# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
|
||||
try:
|
||||
from HTMLParser import (
|
||||
HTMLParser as compat_HTMLParser,
|
||||
@@ -2464,33 +2432,22 @@ except ImportError: # Python 3
|
||||
# HTMLParseError was deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exception handling
|
||||
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
compat_html_parser_HTMLParser = compat_HTMLParser
|
||||
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
|
||||
|
||||
# compat_subprocess_get_DEVNULL
|
||||
try:
|
||||
_DEVNULL = subprocess.DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: _DEVNULL
|
||||
except AttributeError:
|
||||
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
|
||||
|
||||
|
||||
# compat_http_server
|
||||
try:
|
||||
import http.server as compat_http_server
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
|
||||
# compat_urllib_parse_unquote_to_bytes,
|
||||
# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
|
||||
# compat_urllib_parse_urlencode,
|
||||
# compat_urllib_parse_parse_qs
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
@@ -2498,7 +2455,8 @@ try:
|
||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
_asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
|
||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||
else re.compile(r'([\x00-\x7f]+)'))
|
||||
|
||||
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
||||
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
||||
@@ -2566,21 +2524,24 @@ except ImportError: # Python 2
|
||||
# Possible solutions are to either port it from python 3 with all
|
||||
# the friends or manually ensure input query contains only byte strings.
|
||||
# We will stick with latter thus recursively encoding the whole query.
|
||||
def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
|
||||
|
||||
def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
|
||||
def encode_elem(e):
|
||||
if isinstance(e, dict):
|
||||
e = encode_dict(e)
|
||||
elif isinstance(e, (list, tuple,)):
|
||||
e = type(e)(encode_elem(el) for el in e)
|
||||
list_e = encode_list(e)
|
||||
e = tuple(list_e) if isinstance(e, tuple) else list_e
|
||||
elif isinstance(e, compat_str):
|
||||
e = e.encode(encoding, errors)
|
||||
e = e.encode(encoding)
|
||||
return e
|
||||
|
||||
def encode_dict(d):
|
||||
return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
|
||||
return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
|
||||
|
||||
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
|
||||
def encode_list(l):
|
||||
return [encode_elem(e) for e in l]
|
||||
|
||||
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
|
||||
|
||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||
# Python 2's version is apparently totally broken
|
||||
@@ -2635,61 +2596,8 @@ except ImportError: # Python 2
|
||||
('parse_qs', compat_parse_qs)):
|
||||
setattr(compat_urllib_parse, name, fix)
|
||||
|
||||
try:
|
||||
all(chr(i) in b'' for i in range(256))
|
||||
except TypeError:
|
||||
# not all chr(i) are str: patch Python2 quote
|
||||
|
||||
_safemaps = getattr(compat_urllib_parse, '_safemaps', {})
|
||||
_always_safe = frozenset(compat_urllib_parse.always_safe)
|
||||
|
||||
def _quote(s, safe='/'):
|
||||
"""quote('abc def') -> 'abc%20def'"""
|
||||
|
||||
if not s and s is not None: # fast path
|
||||
return s
|
||||
safe = frozenset(safe)
|
||||
cachekey = (safe, _always_safe)
|
||||
try:
|
||||
safe_map = _safemaps[cachekey]
|
||||
except KeyError:
|
||||
safe = _always_safe | safe
|
||||
safe_map = {}
|
||||
for i in range(256):
|
||||
c = chr(i)
|
||||
safe_map[c] = (
|
||||
c if (i < 128 and c in safe)
|
||||
else b'%{0:02X}'.format(i))
|
||||
_safemaps[cachekey] = safe_map
|
||||
|
||||
if safe.issuperset(s):
|
||||
return s
|
||||
return ''.join(safe_map[c] for c in s)
|
||||
|
||||
# linked code
|
||||
def _quote_plus(s, safe=''):
|
||||
return (
|
||||
_quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
|
||||
else _quote(s, safe))
|
||||
|
||||
# linked code
|
||||
def _urlcleanup():
|
||||
if compat_urllib_parse._urlopener:
|
||||
compat_urllib_parse._urlopener.cleanup()
|
||||
_safemaps.clear()
|
||||
compat_urllib_parse.ftpcache.clear()
|
||||
|
||||
for name, fix in (
|
||||
('quote', _quote),
|
||||
('quote_plus', _quote_plus),
|
||||
('urlcleanup', _urlcleanup)):
|
||||
setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
|
||||
setattr(compat_urllib_parse, name, fix)
|
||||
|
||||
compat_urllib_parse_parse_qs = compat_parse_qs
|
||||
|
||||
|
||||
# compat_urllib_request_DataHandler
|
||||
try:
|
||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||
except ImportError: # Python < 3.4
|
||||
@@ -2724,20 +2632,16 @@ except ImportError: # Python < 3.4
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
|
||||
# compat_xml_etree_ElementTree_ParseError
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
|
||||
|
||||
|
||||
# compat_xml_etree_ElementTree_Element
|
||||
_etree = xml.etree.ElementTree
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
|
||||
class _TreeBuilder(_etree.TreeBuilder):
|
||||
class _TreeBuilder(etree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass
|
||||
|
||||
@@ -2746,7 +2650,7 @@ try:
|
||||
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||
# the following will crash with:
|
||||
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||
isinstance(None, _etree.Element)
|
||||
isinstance(None, etree.Element)
|
||||
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||
except TypeError: # Python <=2.6
|
||||
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||
@@ -2754,12 +2658,12 @@ compat_xml_etree_ElementTree_Element = compat_etree_Element
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
def compat_etree_fromstring(text):
|
||||
return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
else:
|
||||
# python 2.x tries to encode unicode strings with ascii (see the
|
||||
# XMLParser._fixtext method)
|
||||
try:
|
||||
_etree_iter = _etree.Element.iter
|
||||
_etree_iter = etree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
def _etree_iter(root):
|
||||
for el in root.findall('*'):
|
||||
@@ -2771,29 +2675,27 @@ else:
|
||||
# 2.7 source
|
||||
def _XML(text, parser=None):
|
||||
if not parser:
|
||||
parser = _etree.XMLParser(target=_TreeBuilder())
|
||||
parser = etree.XMLParser(target=_TreeBuilder())
|
||||
parser.feed(text)
|
||||
return parser.close()
|
||||
|
||||
def _element_factory(*args, **kwargs):
|
||||
el = _etree.Element(*args, **kwargs)
|
||||
el = etree.Element(*args, **kwargs)
|
||||
for k, v in el.items():
|
||||
if isinstance(v, bytes):
|
||||
el.set(k, v.decode('utf-8'))
|
||||
return el
|
||||
|
||||
def compat_etree_fromstring(text):
|
||||
doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
||||
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
||||
for el in _etree_iter(doc):
|
||||
if el.text is not None and isinstance(el.text, bytes):
|
||||
el.text = el.text.decode('utf-8')
|
||||
return doc
|
||||
|
||||
|
||||
# compat_xml_etree_register_namespace
|
||||
try:
|
||||
compat_etree_register_namespace = _etree.register_namespace
|
||||
except AttributeError:
|
||||
if hasattr(etree, 'register_namespace'):
|
||||
compat_etree_register_namespace = etree.register_namespace
|
||||
else:
|
||||
def compat_etree_register_namespace(prefix, uri):
|
||||
"""Register a namespace prefix.
|
||||
The registry is global, and any existing mapping for either the
|
||||
@@ -2802,16 +2704,14 @@ except AttributeError:
|
||||
attributes in this namespace will be serialized with prefix if possible.
|
||||
ValueError is raised if prefix is reserved or is invalid.
|
||||
"""
|
||||
if re.match(r'ns\d+$', prefix):
|
||||
raise ValueError('Prefix format reserved for internal use')
|
||||
for k, v in list(_etree._namespace_map.items()):
|
||||
if re.match(r"ns\d+$", prefix):
|
||||
raise ValueError("Prefix format reserved for internal use")
|
||||
for k, v in list(etree._namespace_map.items()):
|
||||
if k == uri or v == prefix:
|
||||
del _etree._namespace_map[k]
|
||||
_etree._namespace_map[uri] = prefix
|
||||
del etree._namespace_map[k]
|
||||
etree._namespace_map[uri] = prefix
|
||||
compat_xml_etree_register_namespace = compat_etree_register_namespace
|
||||
|
||||
|
||||
# compat_xpath, compat_etree_iterfind
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
# .//node does not match if a node is a direct child of . !
|
||||
@@ -2998,6 +2898,7 @@ if sys.version_info < (2, 7):
|
||||
def __init__(self, root):
|
||||
self.root = root
|
||||
|
||||
##
|
||||
# Generate all matching objects.
|
||||
|
||||
def compat_etree_iterfind(elem, path, namespaces=None):
|
||||
@@ -3032,15 +2933,13 @@ if sys.version_info < (2, 7):
|
||||
|
||||
|
||||
else:
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_etree_iterfind = lambda element, match: element.iterfind(match)
|
||||
compat_xpath = _IDENTITY
|
||||
|
||||
|
||||
# compat_os_name
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
# compat_shlex_quote
|
||||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
@@ -3055,7 +2954,6 @@ else:
|
||||
return "'" + s.replace("'", "'\"'\"'") + "'"
|
||||
|
||||
|
||||
# compat_shlex.split
|
||||
try:
|
||||
args = shlex.split('中文')
|
||||
assert (isinstance(args, list)
|
||||
@@ -3071,7 +2969,6 @@ except (AssertionError, UnicodeEncodeError):
|
||||
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
|
||||
|
||||
|
||||
# compat_ord
|
||||
def compat_ord(c):
|
||||
if isinstance(c, int):
|
||||
return c
|
||||
@@ -3079,7 +2976,6 @@ def compat_ord(c):
|
||||
return ord(c)
|
||||
|
||||
|
||||
# compat_getenv, compat_os_path_expanduser, compat_setenv
|
||||
if sys.version_info >= (3, 0):
|
||||
compat_getenv = os.getenv
|
||||
compat_expanduser = os.path.expanduser
|
||||
@@ -3167,22 +3063,6 @@ else:
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
|
||||
|
||||
# compat_os_makedirs
|
||||
try:
|
||||
os.makedirs('.', exist_ok=True)
|
||||
compat_os_makedirs = os.makedirs
|
||||
except TypeError: # < Py3.2
|
||||
from errno import EEXIST as _errno_EEXIST
|
||||
|
||||
def compat_os_makedirs(name, mode=0o777, exist_ok=False):
|
||||
try:
|
||||
return os.makedirs(name, mode=mode)
|
||||
except OSError as ose:
|
||||
if not (exist_ok and ose.errno == _errno_EEXIST):
|
||||
raise
|
||||
|
||||
|
||||
# compat_os_path_realpath
|
||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
# os.path.realpath on Windows does not follow symbolic links
|
||||
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||
@@ -3196,7 +3076,6 @@ else:
|
||||
compat_os_path_realpath = compat_realpath
|
||||
|
||||
|
||||
# compat_print
|
||||
if sys.version_info < (3, 0):
|
||||
def compat_print(s):
|
||||
from .utils import preferredencoding
|
||||
@@ -3207,7 +3086,6 @@ else:
|
||||
print(s)
|
||||
|
||||
|
||||
# compat_getpass_getpass
|
||||
if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
def compat_getpass(prompt, *args, **kwargs):
|
||||
if isinstance(prompt, compat_str):
|
||||
@@ -3220,42 +3098,36 @@ else:
|
||||
compat_getpass_getpass = compat_getpass
|
||||
|
||||
|
||||
# compat_input
|
||||
try:
|
||||
compat_input = raw_input
|
||||
except NameError: # Python 3
|
||||
compat_input = input
|
||||
|
||||
|
||||
# compat_kwargs
|
||||
# Python < 2.6.5 require kwargs to be bytes
|
||||
try:
|
||||
(lambda x: x)(**{'x': 0})
|
||||
def _testfunc(x):
|
||||
pass
|
||||
_testfunc(**{'x': 0})
|
||||
except TypeError:
|
||||
def compat_kwargs(kwargs):
|
||||
return dict((bytes(k), v) for k, v in kwargs.items())
|
||||
else:
|
||||
compat_kwargs = _IDENTITY
|
||||
compat_kwargs = lambda kwargs: kwargs
|
||||
|
||||
|
||||
# compat_numeric_types
|
||||
try:
|
||||
compat_numeric_types = (int, float, long, complex)
|
||||
except NameError: # Python 3
|
||||
compat_numeric_types = (int, float, complex)
|
||||
|
||||
|
||||
# compat_integer_types
|
||||
try:
|
||||
compat_integer_types = (int, long)
|
||||
except NameError: # Python 3
|
||||
compat_integer_types = (int, )
|
||||
|
||||
# compat_int
|
||||
compat_int = compat_integer_types[-1]
|
||||
|
||||
|
||||
# compat_socket_create_connection
|
||||
if sys.version_info < (2, 7):
|
||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||
host, port = address
|
||||
@@ -3282,7 +3154,6 @@ else:
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
|
||||
|
||||
# compat_contextlib_suppress
|
||||
try:
|
||||
from contextlib import suppress as compat_contextlib_suppress
|
||||
except ImportError:
|
||||
@@ -3325,12 +3196,12 @@ except AttributeError:
|
||||
# repeated .close() is OK, but just in case
|
||||
with compat_contextlib_suppress(EnvironmentError):
|
||||
f.close()
|
||||
popen.wait()
|
||||
popen.wait()
|
||||
|
||||
|
||||
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
|
||||
# See http://bugs.python.org/issue9161 for what is broken
|
||||
def _workaround_optparse_bug9161():
|
||||
def workaround_optparse_bug9161():
|
||||
op = optparse.OptionParser()
|
||||
og = optparse.OptionGroup(op, 'foo')
|
||||
try:
|
||||
@@ -3349,10 +3220,9 @@ def _workaround_optparse_bug9161():
|
||||
optparse.OptionGroup.add_option = _compat_add_option
|
||||
|
||||
|
||||
# compat_shutil_get_terminal_size
|
||||
try:
|
||||
from shutil import get_terminal_size as compat_get_terminal_size # Python >= 3.3
|
||||
except ImportError:
|
||||
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
|
||||
compat_get_terminal_size = shutil.get_terminal_size
|
||||
else:
|
||||
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
||||
|
||||
def compat_get_terminal_size(fallback=(80, 24)):
|
||||
@@ -3382,33 +3252,27 @@ except ImportError:
|
||||
columns = _columns
|
||||
if lines is None or lines <= 0:
|
||||
lines = _lines
|
||||
|
||||
return _terminal_size(columns, lines)
|
||||
|
||||
compat_shutil_get_terminal_size = compat_get_terminal_size
|
||||
|
||||
|
||||
# compat_itertools_count
|
||||
try:
|
||||
type(itertools.count(start=0, step=1))
|
||||
itertools.count(start=0, step=1)
|
||||
compat_itertools_count = itertools.count
|
||||
except TypeError: # Python 2.6 lacks step
|
||||
except TypeError: # Python 2.6
|
||||
def compat_itertools_count(start=0, step=1):
|
||||
while True:
|
||||
yield start
|
||||
start += step
|
||||
|
||||
|
||||
# compat_tokenize_tokenize
|
||||
if sys.version_info >= (3, 0):
|
||||
from tokenize import tokenize as compat_tokenize_tokenize
|
||||
else:
|
||||
from tokenize import generate_tokens as compat_tokenize_tokenize
|
||||
|
||||
|
||||
# compat_struct_pack, compat_struct_unpack, compat_Struct
|
||||
try:
|
||||
type(struct.pack('!I', 0))
|
||||
struct.pack('!I', 0)
|
||||
except TypeError:
|
||||
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
|
||||
# See https://bugs.python.org/issue19099
|
||||
@@ -3440,10 +3304,8 @@ else:
|
||||
compat_Struct = struct.Struct
|
||||
|
||||
|
||||
# builtins returning an iterator
|
||||
|
||||
# compat_map, compat_filter
|
||||
# supposedly the same versioning as for zip below
|
||||
# compat_map/filter() returning an iterator, supposedly the
|
||||
# same versioning as for zip below
|
||||
try:
|
||||
from future_builtins import map as compat_map
|
||||
except ImportError:
|
||||
@@ -3460,7 +3322,6 @@ except ImportError:
|
||||
except ImportError:
|
||||
compat_filter = filter
|
||||
|
||||
# compat_zip
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
except ImportError: # not 2.6+ or is 3.x
|
||||
@@ -3470,7 +3331,6 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
# compat_itertools_zip_longest
|
||||
# method renamed between Py2/3
|
||||
try:
|
||||
from itertools import zip_longest as compat_itertools_zip_longest
|
||||
@@ -3478,8 +3338,7 @@ except ImportError:
|
||||
from itertools import izip_longest as compat_itertools_zip_longest
|
||||
|
||||
|
||||
# compat_collections_chain_map
|
||||
# collections.ChainMap: new class
|
||||
# new class in collections
|
||||
try:
|
||||
from collections import ChainMap as compat_collections_chain_map
|
||||
# Py3.3's ChainMap is deficient
|
||||
@@ -3535,22 +3394,19 @@ except ImportError:
|
||||
def new_child(self, m=None, **kwargs):
|
||||
m = m or {}
|
||||
m.update(kwargs)
|
||||
# support inheritance !
|
||||
return type(self)(m, *self.maps)
|
||||
return compat_collections_chain_map(m, *self.maps)
|
||||
|
||||
@property
|
||||
def parents(self):
|
||||
return type(self)(*(self.maps[1:]))
|
||||
return compat_collections_chain_map(*(self.maps[1:]))
|
||||
|
||||
|
||||
# compat_re_Pattern, compat_re_Match
|
||||
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
|
||||
compat_re_Pattern = type(re.compile(''))
|
||||
# and on the type of a match
|
||||
compat_re_Match = type(re.match('a', 'a'))
|
||||
|
||||
|
||||
# compat_base64_b64decode
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
@@ -3562,7 +3418,6 @@ else:
|
||||
compat_base64_b64decode = compat_b64decode
|
||||
|
||||
|
||||
# compat_ctypes_WINFUNCTYPE
|
||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||
# names, see the original PyPy issue [1] and the youtube-dl one [2].
|
||||
@@ -3581,7 +3436,6 @@ else:
|
||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||
|
||||
|
||||
# compat_open
|
||||
if sys.version_info < (3, 0):
|
||||
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
|
||||
def compat_open(file_, *args, **kwargs):
|
||||
@@ -3609,28 +3463,18 @@ except AttributeError:
|
||||
def compat_datetime_timedelta_total_seconds(td):
|
||||
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
|
||||
|
||||
|
||||
# optional decompression packages
|
||||
# compat_brotli
|
||||
# PyPi brotli package implements 'br' Content-Encoding
|
||||
try:
|
||||
import brotli as compat_brotli
|
||||
except ImportError:
|
||||
compat_brotli = None
|
||||
# compat_ncompress
|
||||
# PyPi ncompress package implements 'compress' Content-Encoding
|
||||
try:
|
||||
import ncompress as compat_ncompress
|
||||
except ImportError:
|
||||
compat_ncompress = None
|
||||
|
||||
# compat_zstandard
|
||||
# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
|
||||
try:
|
||||
import zstandard as compat_zstandard
|
||||
except ImportError:
|
||||
compat_zstandard = None
|
||||
|
||||
|
||||
legacy = [
|
||||
'compat_HTMLParseError',
|
||||
@@ -3647,7 +3491,6 @@ legacy = [
|
||||
'compat_getpass',
|
||||
'compat_parse_qs',
|
||||
'compat_realpath',
|
||||
'compat_shlex_split',
|
||||
'compat_urllib_parse_parse_qs',
|
||||
'compat_urllib_parse_unquote',
|
||||
'compat_urllib_parse_unquote_plus',
|
||||
@@ -3661,6 +3504,8 @@ legacy = [
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_html_parser_HTMLParseError',
|
||||
'compat_html_parser_HTMLParser',
|
||||
'compat_Struct',
|
||||
'compat_base64_b64decode',
|
||||
'compat_basestring',
|
||||
@@ -3669,9 +3514,13 @@ __all__ = [
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_collections_chain_map',
|
||||
'compat_datetime_timedelta_total_seconds',
|
||||
'compat_http_cookiejar',
|
||||
'compat_http_cookiejar_Cookie',
|
||||
'compat_http_cookies',
|
||||
'compat_http_cookies_SimpleCookie',
|
||||
'compat_contextlib_suppress',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_datetime_timedelta_total_seconds',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_iterfind',
|
||||
'compat_filter',
|
||||
@@ -3680,16 +3529,9 @@ __all__ = [
|
||||
'compat_getpass_getpass',
|
||||
'compat_html_entities',
|
||||
'compat_html_entities_html5',
|
||||
'compat_html_parser_HTMLParseError',
|
||||
'compat_html_parser_HTMLParser',
|
||||
'compat_http_cookiejar',
|
||||
'compat_http_cookiejar_Cookie',
|
||||
'compat_http_cookies',
|
||||
'compat_http_cookies_SimpleCookie',
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_input',
|
||||
'compat_int',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_itertools_zip_longest',
|
||||
@@ -3699,7 +3541,6 @@ __all__ = [
|
||||
'compat_numeric_types',
|
||||
'compat_open',
|
||||
'compat_ord',
|
||||
'compat_os_makedirs',
|
||||
'compat_os_name',
|
||||
'compat_os_path_expanduser',
|
||||
'compat_os_path_realpath',
|
||||
@@ -3709,7 +3550,7 @@ __all__ = [
|
||||
'compat_register_utf8',
|
||||
'compat_setenv',
|
||||
'compat_shlex_quote',
|
||||
'compat_shutil_get_terminal_size',
|
||||
'compat_shlex_split',
|
||||
'compat_socket_create_connection',
|
||||
'compat_str',
|
||||
'compat_struct_pack',
|
||||
@@ -3729,5 +3570,5 @@ __all__ = [
|
||||
'compat_xml_etree_register_namespace',
|
||||
'compat_xpath',
|
||||
'compat_zip',
|
||||
'compat_zstandard',
|
||||
'workaround_optparse_bug9161',
|
||||
]
|
||||
|
@@ -11,7 +11,6 @@ from ..utils import (
|
||||
decodeArgument,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
shell_quote,
|
||||
timeconvert,
|
||||
@@ -368,27 +367,14 @@ class FileDownloader(object):
|
||||
})
|
||||
return True
|
||||
|
||||
min_sleep_interval, max_sleep_interval = (
|
||||
float_or_none(self.params.get(interval), default=0)
|
||||
for interval in ('sleep_interval', 'max_sleep_interval'))
|
||||
|
||||
sleep_note = ''
|
||||
available_at = info_dict.get('available_at')
|
||||
if available_at:
|
||||
forced_sleep_interval = available_at - int(time.time())
|
||||
if forced_sleep_interval > min_sleep_interval:
|
||||
sleep_note = 'as required by the site'
|
||||
min_sleep_interval = forced_sleep_interval
|
||||
if forced_sleep_interval > max_sleep_interval:
|
||||
max_sleep_interval = forced_sleep_interval
|
||||
|
||||
sleep_interval = random.uniform(
|
||||
min_sleep_interval, max_sleep_interval or min_sleep_interval)
|
||||
|
||||
if sleep_interval > 0:
|
||||
min_sleep_interval = self.params.get('sleep_interval')
|
||||
if min_sleep_interval:
|
||||
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||
self.to_screen(
|
||||
'[download] Sleeping %.2f seconds %s...' % (
|
||||
sleep_interval, sleep_note))
|
||||
'[download] Sleeping %s seconds...' % (
|
||||
int(sleep_interval) if sleep_interval.is_integer()
|
||||
else '%.2f' % sleep_interval))
|
||||
time.sleep(sleep_interval)
|
||||
|
||||
return self.real_download(filename, info_dict)
|
||||
|
@@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BokeCCIE(BokeCCBaseIE):
|
||||
IE_DESC = 'CC视频'
|
||||
_IE_DESC = 'CC视频'
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CloudyIE(InfoExtractor):
|
||||
IE_DESC = 'cloudy.ec'
|
||||
_IE_DESC = 'cloudy.ec'
|
||||
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cloudy.ec/v/af511e2527aac',
|
||||
|
@@ -422,8 +422,6 @@ class InfoExtractor(object):
|
||||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
# supply this in public subclasses: used in supported sites list, etc
|
||||
# IE_DESC = 'short description of IE'
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
@@ -505,7 +503,7 @@ class InfoExtractor(object):
|
||||
if not self._x_forwarded_for_ip:
|
||||
|
||||
# Geo bypass mechanism is explicitly disabled by user
|
||||
if not self.get_param('geo_bypass', True):
|
||||
if not self._downloader.params.get('geo_bypass', True):
|
||||
return
|
||||
|
||||
if not geo_bypass_context:
|
||||
@@ -527,7 +525,7 @@ class InfoExtractor(object):
|
||||
|
||||
# Explicit IP block specified by user, use it right away
|
||||
# regardless of whether extractor is geo bypassable or not
|
||||
ip_block = self.get_param('geo_bypass_ip_block', None)
|
||||
ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
|
||||
|
||||
# Otherwise use random IP block from geo bypass context but only
|
||||
# if extractor is known as geo bypassable
|
||||
@@ -538,8 +536,8 @@ class InfoExtractor(object):
|
||||
|
||||
if ip_block:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
||||
if self.get_param('verbose', False):
|
||||
self.to_screen(
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen(
|
||||
'[debug] Using fake IP %s as X-Forwarded-For.'
|
||||
% self._x_forwarded_for_ip)
|
||||
return
|
||||
@@ -548,7 +546,7 @@ class InfoExtractor(object):
|
||||
|
||||
# Explicit country code specified by user, use it right away
|
||||
# regardless of whether extractor is geo bypassable or not
|
||||
country = self.get_param('geo_bypass_country', None)
|
||||
country = self._downloader.params.get('geo_bypass_country', None)
|
||||
|
||||
# Otherwise use random country code from geo bypass context but
|
||||
# only if extractor is known as geo bypassable
|
||||
@@ -559,8 +557,8 @@ class InfoExtractor(object):
|
||||
|
||||
if country:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
||||
if self.get_param('verbose', False):
|
||||
self.to_screen(
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen(
|
||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||
% (self._x_forwarded_for_ip, country.upper()))
|
||||
|
||||
@@ -586,9 +584,9 @@ class InfoExtractor(object):
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e)
|
||||
|
||||
def __maybe_fake_ip_and_retry(self, countries):
|
||||
if (not self.get_param('geo_bypass_country', None)
|
||||
if (not self._downloader.params.get('geo_bypass_country', None)
|
||||
and self._GEO_BYPASS
|
||||
and self.get_param('geo_bypass', True)
|
||||
and self._downloader.params.get('geo_bypass', True)
|
||||
and not self._x_forwarded_for_ip
|
||||
and countries):
|
||||
country_code = random.choice(countries)
|
||||
@@ -698,7 +696,7 @@ class InfoExtractor(object):
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||
else:
|
||||
self.report_warning(errmsg)
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
@@ -770,11 +768,11 @@ class InfoExtractor(object):
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if not encoding:
|
||||
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
if self._downloader.params.get('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self.to_screen(dump)
|
||||
if self.get_param('write_pages', False):
|
||||
self._downloader.to_screen(dump)
|
||||
if self._downloader.params.get('write_pages', False):
|
||||
basen = '%s_%s' % (video_id, urlh.geturl())
|
||||
if len(basen) > 240:
|
||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
@@ -976,9 +974,19 @@ class InfoExtractor(object):
|
||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||
self._downloader.to_screen(self.__ie_msg(msg))
|
||||
|
||||
def write_debug(self, msg, only_once=False):
|
||||
def write_debug(self, msg, only_once=False, _cache=[]):
|
||||
'''Log debug message or Print message to stderr'''
|
||||
self._downloader.write_debug(self.__ie_msg(msg), only_once=only_once)
|
||||
if not self.get_param('verbose', False):
|
||||
return
|
||||
message = '[debug] ' + self.__ie_msg(msg)
|
||||
logger = self.get_param('logger')
|
||||
if logger:
|
||||
logger.debug(message)
|
||||
else:
|
||||
if only_once and hash(message) in _cache:
|
||||
return
|
||||
self._downloader.to_stderr(message)
|
||||
_cache.append(hash(message))
|
||||
|
||||
# name, default=None, *args, **kwargs
|
||||
def get_param(self, name, *args, **kwargs):
|
||||
@@ -1074,7 +1082,7 @@ class InfoExtractor(object):
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||
_name = '\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
@@ -1092,7 +1100,7 @@ class InfoExtractor(object):
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
else:
|
||||
self.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
|
||||
@@ -1162,7 +1170,7 @@ class InfoExtractor(object):
|
||||
username = None
|
||||
password = None
|
||||
|
||||
if self.get_param('usenetrc', False):
|
||||
if self._downloader.params.get('usenetrc', False):
|
||||
try:
|
||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||
info = netrc.netrc().authenticators(netrc_machine)
|
||||
@@ -1173,7 +1181,7 @@ class InfoExtractor(object):
|
||||
raise netrc.NetrcParseError(
|
||||
'No authenticators for %s' % netrc_machine)
|
||||
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
||||
self.report_warning(
|
||||
self._downloader.report_warning(
|
||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||
|
||||
return username, password
|
||||
@@ -1210,10 +1218,10 @@ class InfoExtractor(object):
|
||||
"""
|
||||
if self._downloader is None:
|
||||
return None
|
||||
downloader_params = self._downloader.params
|
||||
|
||||
twofactor = self.get_param('twofactor')
|
||||
if twofactor is not None:
|
||||
return twofactor
|
||||
if downloader_params.get('twofactor') is not None:
|
||||
return downloader_params['twofactor']
|
||||
|
||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||
|
||||
@@ -1348,7 +1356,7 @@ class InfoExtractor(object):
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||
else:
|
||||
self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||
return {}
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
@@ -1579,7 +1587,7 @@ class InfoExtractor(object):
|
||||
|
||||
if f.get('vcodec') == 'none': # audio only
|
||||
preference -= 50
|
||||
if self.get_param('prefer_free_formats'):
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
||||
else:
|
||||
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
|
||||
@@ -1591,7 +1599,7 @@ class InfoExtractor(object):
|
||||
else:
|
||||
if f.get('acodec') == 'none': # video only
|
||||
preference -= 40
|
||||
if self.get_param('prefer_free_formats'):
|
||||
if self._downloader.params.get('prefer_free_formats'):
|
||||
ORDER = ['flv', 'mp4', 'webm']
|
||||
else:
|
||||
ORDER = ['webm', 'flv', 'mp4']
|
||||
@@ -1657,7 +1665,7 @@ class InfoExtractor(object):
|
||||
""" Either "http:" or "https:", depending on the user's preferences """
|
||||
return (
|
||||
'http:'
|
||||
if self.get_param('prefer_insecure', False)
|
||||
if self._downloader.params.get('prefer_insecure', False)
|
||||
else 'https:')
|
||||
|
||||
def _proto_relative_url(self, url, scheme=None):
|
||||
@@ -3162,7 +3170,7 @@ class InfoExtractor(object):
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
@@ -3189,7 +3197,7 @@ class InfoExtractor(object):
|
||||
if fatal:
|
||||
raise ExtractorError(msg)
|
||||
else:
|
||||
self.report_warning(msg)
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _float(self, v, name, fatal=False, **kwargs):
|
||||
@@ -3199,7 +3207,7 @@ class InfoExtractor(object):
|
||||
if fatal:
|
||||
raise ExtractorError(msg)
|
||||
else:
|
||||
self.report_warning(msg)
|
||||
self._downloader.report_warning(msg)
|
||||
return res
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||
@@ -3208,12 +3216,12 @@ class InfoExtractor(object):
|
||||
0, name, value, port, port is not None, domain, True,
|
||||
domain.startswith('.'), path, True, secure, expire_time,
|
||||
discard, None, None, rest)
|
||||
self.cookiejar.set_cookie(cookie)
|
||||
self._downloader.cookiejar.set_cookie(cookie)
|
||||
|
||||
def _get_cookies(self, url):
|
||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||
req = sanitized_Request(url)
|
||||
self.cookiejar.add_cookie_header(req)
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
@@ -3273,8 +3281,8 @@ class InfoExtractor(object):
|
||||
return not any_restricted
|
||||
|
||||
def extract_subtitles(self, *args, **kwargs):
|
||||
if (self.get_param('writesubtitles', False)
|
||||
or self.get_param('listsubtitles')):
|
||||
if (self._downloader.params.get('writesubtitles', False)
|
||||
or self._downloader.params.get('listsubtitles')):
|
||||
return self._get_subtitles(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
@@ -3295,11 +3303,7 @@ class InfoExtractor(object):
|
||||
""" Merge subtitle dictionaries, language by language. """
|
||||
|
||||
# ..., * , target=None
|
||||
target = kwargs.get('target')
|
||||
if target is None:
|
||||
target = dict(subtitle_dict1)
|
||||
else:
|
||||
subtitle_dicts = (subtitle_dict1,) + subtitle_dicts
|
||||
target = kwargs.get('target') or dict(subtitle_dict1)
|
||||
|
||||
for subtitle_dict in subtitle_dicts:
|
||||
for lang in subtitle_dict:
|
||||
@@ -3307,8 +3311,8 @@ class InfoExtractor(object):
|
||||
return target
|
||||
|
||||
def extract_automatic_captions(self, *args, **kwargs):
|
||||
if (self.get_param('writeautomaticsub', False)
|
||||
or self.get_param('listsubtitles')):
|
||||
if (self._downloader.params.get('writeautomaticsub', False)
|
||||
or self._downloader.params.get('listsubtitles')):
|
||||
return self._get_automatic_captions(*args, **kwargs)
|
||||
return {}
|
||||
|
||||
@@ -3316,9 +3320,9 @@ class InfoExtractor(object):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
||||
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if (self.get_param('mark_watched', False)
|
||||
if (self._downloader.params.get('mark_watched', False)
|
||||
and (self._get_login_info()[0] is not None
|
||||
or self.get_param('cookiefile') is not None)):
|
||||
or self._downloader.params.get('cookiefile') is not None)):
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
@@ -3326,7 +3330,7 @@ class InfoExtractor(object):
|
||||
|
||||
def geo_verification_headers(self):
|
||||
headers = {}
|
||||
geo_verification_proxy = self.get_param('geo_verification_proxy')
|
||||
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
|
||||
if geo_verification_proxy:
|
||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||
return headers
|
||||
|
@@ -35,6 +35,15 @@ from ..utils import (
|
||||
|
||||
class ITVBaseIE(InfoExtractor):
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
transform_source = kw.pop('transform_source', None)
|
||||
fatal = kw.pop('fatal', True)
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
|
||||
webpage, 'next.js data', group='js', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
|
||||
if errnote is False:
|
||||
return False
|
||||
@@ -100,9 +109,7 @@ class ITVBaseIE(InfoExtractor):
|
||||
|
||||
class ITVIE(ITVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
|
||||
IE_DESC = 'ITVX'
|
||||
_WORKING = False
|
||||
|
||||
_IE_DESC = 'ITVX'
|
||||
_TESTS = [{
|
||||
'note': 'Hub URLs redirect to ITVX',
|
||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||
@@ -263,7 +270,7 @@ class ITVIE(ITVBaseIE):
|
||||
'ext': determine_ext(href, 'vtt'),
|
||||
})
|
||||
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
|
||||
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
|
||||
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
|
||||
info = self._og_extract(webpage, require_title=not title)
|
||||
@@ -316,7 +323,7 @@ class ITVIE(ITVBaseIE):
|
||||
|
||||
class ITVBTCCIE(ITVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||
_TESTS = [{
|
||||
'note': 'British Touring Car Championship',
|
||||
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
|
||||
|
@@ -7,30 +7,35 @@ import operator
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
NO_DEFAULT,
|
||||
orderedSet,
|
||||
parse_count,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
remove_start,
|
||||
T,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PornHubBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'pornhub'
|
||||
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
|
||||
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
def dl(*args, **kwargs):
|
||||
@@ -47,10 +52,7 @@ class PornHubBaseIE(InfoExtractor):
|
||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||
r'document\.cookie\s*=\s*["\']RNKEY=',
|
||||
r'document\.location\.reload\(true\)')):
|
||||
url_or_request = args[0]
|
||||
url = (url_or_request.get_full_url()
|
||||
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else url_or_request)
|
||||
url = urlh.geturl()
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
phantom.get(url, html=webpage)
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
@@ -60,11 +62,17 @@ class PornHubBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _set_age_cookies(self, host):
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
|
||||
self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
|
||||
self._set_cookie(host, 'accessPH', '1')
|
||||
|
||||
def _login(self, host):
|
||||
if self._logged_in:
|
||||
return
|
||||
|
||||
site = host.split('.')[0]
|
||||
site = host.split('.', 1)[0]
|
||||
|
||||
# Both sites pornhub and pornhubpremium have separate accounts
|
||||
# so there should be an option to provide credentials for both.
|
||||
@@ -81,9 +89,9 @@ class PornHubBaseIE(InfoExtractor):
|
||||
login_url, None, 'Downloading %s login page' % site)
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']signOut',
|
||||
r'>Sign\s+[Oo]ut\s*<'))
|
||||
return bool(
|
||||
get_element_by_id('profileMenuDropdown', webpage)
|
||||
or get_element_by_class('ph-icon-logout', webpage))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
@@ -92,12 +100,12 @@ class PornHubBaseIE(InfoExtractor):
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'email': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.%s/front/authenticate' % host, None,
|
||||
'https://www.%s/front/authenticate' % host, 'login',
|
||||
'Logging in to %s' % site,
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={
|
||||
@@ -119,17 +127,12 @@ class PornHubBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PornHubIE(PornHubBaseIE):
|
||||
IE_DESC = 'PornHub and Thumbzilla'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?
|
||||
%s
|
||||
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
''' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
IE_DESC = 'PornHub' # Thumbzilla -> Redtube.com, Modelhub -> uviu.com
|
||||
_PORNHUB_PATH_RE = r'/(?:(?:view_video\.php%s)\?(?:.+&)?viewkey=%s)(?P<id>[\da-z]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?%s%s' % (
|
||||
PornHubBaseIE._PORNHUB_HOST_RE, _PORNHUB_PATH_RE % ('|video/show', '|embed/'))
|
||||
_PORNHUB_PATH_RE = _PORNHUB_PATH_RE % ('', '')
|
||||
_EMBED_REGEX = [r'<iframe\s[^>]*?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
|
||||
@@ -138,6 +141,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
||||
'uploader': 'Babes',
|
||||
'uploader_id': '/users/babes-com',
|
||||
'upload_date': '20130628',
|
||||
'timestamp': 1372447216,
|
||||
'duration': 361,
|
||||
@@ -148,6 +152,10 @@ class PornHubIE(PornHubBaseIE):
|
||||
'age_limit': 18,
|
||||
'tags': list,
|
||||
'categories': list,
|
||||
'cast': list,
|
||||
},
|
||||
'params': {
|
||||
'format': '[format_id!^=hls]',
|
||||
},
|
||||
}, {
|
||||
# non-ASCII title
|
||||
@@ -189,14 +197,27 @@ class PornHubIE(PornHubBaseIE):
|
||||
'categories': list,
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
"ext": 'srt'
|
||||
}]
|
||||
'ext': 'srt',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video has been disabled',
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
|
||||
'info_dict': {
|
||||
'id': 'ph601dc30bae19a',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1612564932,
|
||||
'age_limit': 18,
|
||||
'uploader': 'Projekt Melody',
|
||||
'uploader_id': 'projekt-melody',
|
||||
'upload_date': '20210205',
|
||||
'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
|
||||
'thumbnail': r're:https?://.+',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
|
||||
'only_matching': True,
|
||||
@@ -216,9 +237,6 @@ class PornHubIE(PornHubBaseIE):
|
||||
# private video
|
||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||
'only_matching': True,
|
||||
@@ -244,28 +262,36 @@ class PornHubIE(PornHubBaseIE):
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
|
||||
'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
webpage)
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
def yield_urls():
|
||||
for p in cls._EMBED_REGEX:
|
||||
for from_ in re.finditer(p, webpage):
|
||||
yield from_.group('url')
|
||||
|
||||
return list(yield_urls())
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
return str_to_int(self._search_regex(
|
||||
return parse_count(self._search_regex(
|
||||
pattern, webpage, '%s count' % name, fatal=False))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
for _ in range(2):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id') if mobj else self._generic_id(url)
|
||||
_, urlh = self._download_webpage_handle(url, video_id)
|
||||
if url == urlh.geturl():
|
||||
break
|
||||
url = urlh.geturl()
|
||||
|
||||
host = mobj.group('host') or 'pornhub.com'
|
||||
video_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
self._set_age_cookies(host)
|
||||
|
||||
def dl_webpage(platform):
|
||||
self._set_cookie(host, 'platform', platform)
|
||||
@@ -276,7 +302,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
(r'(?s)<div[^>]+class=("|\')(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
|
||||
webpage, 'error message', default=None, group='error')
|
||||
if error_msg:
|
||||
@@ -285,9 +311,9 @@ class PornHubIE(PornHubBaseIE):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'class=["\']geoBlocked["\']',
|
||||
r'>\s*This content is unavailable in your country')):
|
||||
if bool(get_element_by_class('geoBlocked', webpage)
|
||||
or self._search_regex(
|
||||
r'>\s*This content is (unavailable) in your country', webpage, 'geo-restriction', default=False)):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
@@ -304,36 +330,34 @@ class PornHubIE(PornHubBaseIE):
|
||||
video_urls_set = set()
|
||||
subtitles = {}
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
video_id)
|
||||
if flashvars:
|
||||
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'srt',
|
||||
})
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
media_definitions = flashvars.get('mediaDefinitions')
|
||||
if isinstance(media_definitions, list):
|
||||
for definition in media_definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
if video_url in video_urls_set:
|
||||
continue
|
||||
video_urls_set.add(video_url)
|
||||
video_urls.append(
|
||||
(video_url, int_or_none(definition.get('quality'))))
|
||||
else:
|
||||
thumbnail, duration = [None] * 2
|
||||
def add_video_url(video_url, quality=None):
|
||||
v_url = url_or_none(video_url)
|
||||
if not v_url:
|
||||
return
|
||||
if v_url in video_urls_set:
|
||||
return
|
||||
video_urls.append((v_url, quality))
|
||||
video_urls_set.add(v_url)
|
||||
|
||||
def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
|
||||
flashvars = self._search_json(r'var\s+flashvars_\d+\s*=', webpage, 'flashvars', video_id)
|
||||
flashvars = traverse_obj(flashvars, {
|
||||
'closedCaptionsFile': ('closedCaptionsFile', T(url_or_none)),
|
||||
'image_url': ('image_url', T(url_or_none)),
|
||||
'video_duration': ('video_duration', T(int_or_none)),
|
||||
'mediaDefinitions': ('mediaDefinitions', lambda _, v: v['videoUrl']),
|
||||
}) or {}
|
||||
subtitle_url = flashvars.get('closedCaptionsFile')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'srt',
|
||||
})
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = flashvars.get('video_duration')
|
||||
for definition in flashvars.get('mediaDefinitions') or []:
|
||||
add_video_url(definition['videoUrl'], int_or_none(definition.get('quality')))
|
||||
|
||||
def extract_js_vars(webpage, pattern, default=None):
|
||||
assignments = self._search_regex(
|
||||
pattern, webpage, 'encoded url', default=default)
|
||||
if not assignments:
|
||||
@@ -363,51 +387,33 @@ class PornHubIE(PornHubBaseIE):
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
return js_vars
|
||||
|
||||
def add_video_url(video_url):
|
||||
v_url = url_or_none(video_url)
|
||||
if not v_url:
|
||||
return
|
||||
if v_url in video_urls_set:
|
||||
return
|
||||
video_urls.append((v_url, None))
|
||||
video_urls_set.add(v_url)
|
||||
|
||||
def parse_quality_items(quality_items):
|
||||
q_items = self._parse_json(quality_items, video_id, fatal=False)
|
||||
if not isinstance(q_items, list):
|
||||
return
|
||||
for item in q_items:
|
||||
if isinstance(item, dict):
|
||||
add_video_url(item.get('url'))
|
||||
for v_url in traverse_obj(q_items, (Ellipsis, 'url')):
|
||||
add_video_url(v_url)
|
||||
|
||||
if not video_urls:
|
||||
FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
|
||||
js_vars = extract_js_vars(
|
||||
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
|
||||
default=None)
|
||||
if js_vars:
|
||||
for key, format_url in js_vars.items():
|
||||
if key.startswith(FORMAT_PREFIXES[-1]):
|
||||
parse_quality_items(format_url)
|
||||
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
|
||||
add_video_url(format_url)
|
||||
if not video_urls and re.search(
|
||||
r'<[^>]+\bid=["\']lockedPlayer', webpage):
|
||||
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES))
|
||||
for key, format_url in js_vars.items():
|
||||
if key.startswith(FORMAT_PREFIXES[-1]):
|
||||
parse_quality_items(format_url)
|
||||
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
|
||||
add_video_url(format_url)
|
||||
if not video_urls and get_element_by_id('lockedPlayer', webpage):
|
||||
raise ExtractorError(
|
||||
'Video %s is locked' % video_id, expected=True)
|
||||
|
||||
if not video_urls:
|
||||
js_vars = extract_js_vars(
|
||||
dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
|
||||
add_video_url(js_vars['mediastring'])
|
||||
add_video_url(traverse_obj(js_vars, 'mediastring'))
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
add_video_url(mobj.group('url'))
|
||||
|
||||
upload_date = None
|
||||
formats = []
|
||||
@@ -433,6 +439,13 @@ class PornHubIE(PornHubBaseIE):
|
||||
'height': height,
|
||||
})
|
||||
|
||||
if not video_urls:
|
||||
# import here to avoid mutually recursive dependency
|
||||
from .generic import GenericIE
|
||||
ret = GenericIE.generic_url_result(url, video_id=video_id, video_title=title, force_videoid=True)
|
||||
ret['_type'] = 'url_transparent'
|
||||
return ret
|
||||
|
||||
for video_url, height in video_urls:
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
@@ -440,52 +453,55 @@ class PornHubIE(PornHubBaseIE):
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('/', '')
|
||||
if '/video/get_media' in video_url:
|
||||
# self._set_cookie(host, 'platform', 'tv')
|
||||
medias = self._download_json(video_url, video_id, fatal=False)
|
||||
if isinstance(medias, list):
|
||||
for media in medias:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
video_url = url_or_none(media.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
height = int_or_none(media.get('quality'))
|
||||
add_format(video_url, height)
|
||||
for media in traverse_obj(medias, lambda _, v: v['videoUrl']):
|
||||
video_url = url_or_none(media['videoUrl'])
|
||||
if not video_url:
|
||||
continue
|
||||
height = int_or_none(media.get('quality'))
|
||||
add_format(video_url, height)
|
||||
continue
|
||||
add_format(video_url)
|
||||
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'fps', 'format_id'))
|
||||
|
||||
model_profile = self._search_json(
|
||||
r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', default=None)
|
||||
webpage, 'uploader', default=None) or model_profile.get('username')
|
||||
|
||||
def extract_vote_count(kind, name):
|
||||
return self._extract_count(
|
||||
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
|
||||
(r'<span[^>]+\bclass="votes%s"[^>]*>(\d[\d,\.]*[kKmM]?)</span>' % kind,
|
||||
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
|
||||
webpage, name)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||
r'<span class="count">(\d[\d,\.]*[kKmM]?)</span> [Vv]iews', webpage, 'view')
|
||||
like_count = extract_vote_count('Up', 'like')
|
||||
dislike_count = extract_vote_count('Down', 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
r'All Comments\s*<span>\((\d[\d,\.]*[kKmM]?)\)', webpage, 'comment')
|
||||
|
||||
def extract_list(meta_key):
|
||||
div = self._search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
|
||||
% meta_key, webpage, meta_key, default=None)
|
||||
if div:
|
||||
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
|
||||
return [clean_html(x) for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
# description provided in JSON-LD is irrelevant
|
||||
info['description'] = None
|
||||
for k in ('url', 'description'):
|
||||
info.pop(k, None)
|
||||
|
||||
return merge_dicts({
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
|
||||
'upload_date': upload_date,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
@@ -498,8 +514,9 @@ class PornHubIE(PornHubBaseIE):
|
||||
'age_limit': 18,
|
||||
'tags': extract_list('tags'),
|
||||
'categories': extract_list('categories'),
|
||||
'cast': extract_list('pornstars'),
|
||||
'subtitles': subtitles,
|
||||
}, info)
|
||||
})
|
||||
|
||||
|
||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
@@ -512,65 +529,28 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
# drop-down menu that uses similar pattern for videos (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11594).
|
||||
container = self._search_regex(
|
||||
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
|
||||
r'(?s)(<div\s[^>]*class=["\']container.+)', webpage,
|
||||
'container', default=webpage)
|
||||
|
||||
def entries():
|
||||
seen_ids = set()
|
||||
for m in re.finditer(r'<\w+\s[^>]*(?<!-)\bhref\s*=\s*.("|\'|\b)%s\1[^>]*>' % (PornHubIE._PORNHUB_PATH_RE,), container):
|
||||
video_id = m.group('id')
|
||||
if video_id:
|
||||
if video_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(video_id)
|
||||
elt = extract_attributes(m.group(0))
|
||||
video_url = urljoin(host, elt.get('href'))
|
||||
yield video_url, video_id, elt.get('title')
|
||||
|
||||
return [
|
||||
self.url_result(
|
||||
'http://www.%s/%s' % (host, video_url),
|
||||
PornHubIE.ie_key(), video_title=title)
|
||||
for video_url, title in orderedSet(re.findall(
|
||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||
container))
|
||||
video_url, PornHubIE.ie_key(), video_title=title, video_id=video_id)
|
||||
for video_url, video_id, title in entries()
|
||||
]
|
||||
|
||||
|
||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious',
|
||||
'info_dict': {
|
||||
'id': 'liz-vicious',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/users/russianveet69',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/channels/povd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Unavailable via /videos page, but available with direct pagination
|
||||
# on pornstar page (see [1]), requires premium
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as before, multi page
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('id')
|
||||
videos_url = '%s/videos' % mobj.group('url')
|
||||
page = self._extract_page(url)
|
||||
if page:
|
||||
videos_url = update_url_query(videos_url, {'page': page})
|
||||
return self.url_result(
|
||||
videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
|
||||
|
||||
|
||||
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
@staticmethod
|
||||
def _has_more(webpage):
|
||||
@@ -617,23 +597,77 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
for e in page_entries:
|
||||
yield e
|
||||
for from_ in page_entries:
|
||||
yield from_
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mobj = self._match_valid_url(url)
|
||||
host = mobj.group('host')
|
||||
item_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
self._set_age_cookies(host)
|
||||
|
||||
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
||||
|
||||
|
||||
class PornHubUserIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?P<id>(?:(?:user|channel)s|model|pornstar)/[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||
'info_dict': {
|
||||
'id': 'zoe_ph',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/liz-vicious',
|
||||
'info_dict': {
|
||||
'id': 'liz-vicious',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/users/russianveet69',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/channels/povd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Unavailable via /videos page, but available with direct pagination
|
||||
# on pornstar page (see [1]), requires premium
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as before, multi page
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
user_id, host = mobj.group('id', 'host')
|
||||
videos_url = '%s/videos' % mobj.group('url')
|
||||
page = self._extract_page(url)
|
||||
if page:
|
||||
videos_url = update_url_query(videos_url, {'page': page})
|
||||
|
||||
self._login(host)
|
||||
|
||||
return self.playlist_result(self._entries(videos_url, host, user_id), user_id.split('/')[-1])
|
||||
# return self.url_result(
|
||||
# videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
|
||||
|
||||
|
||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/|gif/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
@@ -642,16 +676,20 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
|
||||
'info_dict': {
|
||||
'id': 'pornstar/jenny-blighe/videos',
|
||||
},
|
||||
'playlist_mincount': 149,
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
|
||||
'url': 'https://www.pornhub.com/pornstar/kylie-quinn/videos',
|
||||
'info_dict': {
|
||||
'id': 'pornstar/jenny-blighe/videos',
|
||||
'id': 'pornstar/kylie-quinn/videos',
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/kylie-quinn/videos?page=2',
|
||||
'info_dict': {
|
||||
'id': 'pornstar/kylie-quinn/videos',
|
||||
},
|
||||
# specific page: process just that page
|
||||
'playlist_count': 40,
|
||||
}, {
|
||||
# default sorting as Top Rated Videos
|
||||
'url': 'https://www.pornhub.com/channels/povd/videos',
|
||||
@@ -727,27 +765,14 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/playlist/44121572',
|
||||
'info_dict': {
|
||||
'id': 'playlist/44121572',
|
||||
},
|
||||
'playlist_mincount': 132,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
|
||||
'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
|
||||
else super(PornHubPagedVideoListIE, cls).suitable(url))
|
||||
return (not any(ph.suitable(url) for ph in (PornHubIE, PornHubUserIE, PornHubUserVideosUploadIE))
|
||||
and super(PornHubPagedVideoListIE, cls).suitable(url))
|
||||
|
||||
|
||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
@@ -762,6 +787,62 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
|
||||
'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/playlist/44121572',
|
||||
'info_dict': {
|
||||
'id': '44121572',
|
||||
},
|
||||
'playlist_mincount': 55,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.pornhub.com/playlist/4667351?page=2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, url, host, item_id):
|
||||
webpage = self._download_webpage(url, item_id, 'Downloading page 1')
|
||||
playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id')
|
||||
video_count = int_or_none(
|
||||
self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count'))
|
||||
token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token')
|
||||
page_count = (video_count - 36 + 39) // 40 + 1
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
|
||||
def download_page(page_num):
|
||||
note = 'Downloading page {0}'.format(page_num)
|
||||
page_url = 'https://www.{0}/playlist/viewChunked'.format(host)
|
||||
return self._download_webpage(page_url, item_id, note, query={
|
||||
'id': playlist_id,
|
||||
'page': page_num,
|
||||
'token': token,
|
||||
})
|
||||
|
||||
for page_num in range(1, page_count + 1):
|
||||
if page_num > 1:
|
||||
webpage = download_page(page_num)
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
for from_ in page_entries:
|
||||
yield from_
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
host, item_id = mobj.group('host', 'id')
|
||||
|
||||
self._login(host)
|
||||
self._set_age_cookies(host)
|
||||
|
||||
return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
|
||||
|
@@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
|
||||
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
|
||||
['arch', '', 'http://ussenate-f.akamaihd.net/']
|
||||
]
|
||||
IE_NAME = 'senate.gov'
|
||||
_IE_NAME = 'senate.gov'
|
||||
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -404,10 +404,6 @@ def parseOpts(overrideArguments=None):
|
||||
'-F', '--list-formats',
|
||||
action='store_true', dest='listformats',
|
||||
help='List all available formats of requested videos')
|
||||
video_format.add_option(
|
||||
'--no-list-formats',
|
||||
action='store_false', dest='listformats',
|
||||
help='Do not list available formats of requested videos (default)')
|
||||
video_format.add_option(
|
||||
'--youtube-include-dash-manifest',
|
||||
action='store_true', dest='youtube_include_dash_manifest', default=True,
|
||||
@@ -416,17 +412,6 @@ def parseOpts(overrideArguments=None):
|
||||
'--youtube-skip-dash-manifest',
|
||||
action='store_false', dest='youtube_include_dash_manifest',
|
||||
help='Do not download the DASH manifests and related data on YouTube videos')
|
||||
video_format.add_option(
|
||||
'--youtube-player-js-variant',
|
||||
action='store', dest='youtube_player_js_variant',
|
||||
help='For YouTube, the player javascript variant to use for n/sig deciphering; `actual` to follow the site; default `%default`.',
|
||||
choices=('actual', 'main', 'tcc', 'tce', 'es5', 'es6', 'tv', 'tv_es6', 'phone', 'tablet'),
|
||||
default='main', metavar='VARIANT')
|
||||
video_format.add_option(
|
||||
'--youtube-player-js-version',
|
||||
action='store', dest='youtube_player_js_version',
|
||||
help='For YouTube, the player javascript version to use for n/sig deciphering, specified as `signature_timestamp@hash`, or `actual` to follow the site; default `%default`',
|
||||
default='20348@0004de42', metavar='STS@HASH')
|
||||
video_format.add_option(
|
||||
'--merge-output-format',
|
||||
action='store', dest='merge_output_format', metavar='FORMAT', default=None,
|
||||
|
@@ -4204,16 +4204,12 @@ def lowercase_escape(s):
|
||||
s)
|
||||
|
||||
|
||||
def escape_rfc3986(s, safe=None):
|
||||
def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0):
|
||||
s = _encode_compat_str(s, 'utf-8')
|
||||
if safe is not None:
|
||||
safe = _encode_compat_str(safe, 'utf-8')
|
||||
if safe is None:
|
||||
safe = b"%/;:@&=+$,!~*'()?#[]"
|
||||
# ensure unicode: after quoting, it can always be converted
|
||||
return compat_str(compat_urllib_parse.quote(s, safe))
|
||||
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2025.04.07'
|
||||
__version__ = '2021.12.17'
|
||||
|
Reference in New Issue
Block a user