Compare commits

..

5 Commits

Author SHA1 Message Date
dirkf
e5bfed6c77 [PornHub] Remove extraneous modelhub login code 2024-10-15 21:11:28 +01:00
dirkf
38fce984f4 [PornHub] Fix typo in path regex 2024-10-14 14:23:46 +01:00
dirkf
49093c09c0 Merge pull request #32950 from ytdl-org/master
Merge from master
2024-10-14 14:09:51 +01:00
dirkf
f3cf092584 Integrate changes from yt-dlp and PR 31432
Thx:
* MrBigDig <mrbigdig2020@gmail.com>
* yt-dlp contributors.

Supersedes, closes #31432.
2024-10-14 12:39:50 +01:00
DarkFighterLuke
34e1010545 Fix view_count 2022-03-10 15:07:24 +01:00
21 changed files with 936 additions and 2364 deletions

View File

@@ -116,29 +116,29 @@ jobs:
strategy:
fail-fast: true
matrix:
os: [ubuntu-22.04]
os: [ubuntu-20.04]
python-version: ${{ fromJSON(needs.select.outputs.cpython-versions) }}
python-impl: [cpython]
ytdl-test-set: ${{ fromJSON(needs.select.outputs.test-set) }}
run-tests-ext: [sh]
include:
- os: windows-2022
- os: windows-2019
python-version: 3.4
python-impl: cpython
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
run-tests-ext: bat
- os: windows-2022
- os: windows-2019
python-version: 3.4
python-impl: cpython
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
run-tests-ext: bat
# jython
- os: ubuntu-22.04
- os: ubuntu-20.04
python-version: 2.7
python-impl: jython
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'core') && 'core' || 'nocore' }}
run-tests-ext: sh
- os: ubuntu-22.04
- os: ubuntu-20.04
python-version: 2.7
python-impl: jython
ytdl-test-set: ${{ contains(needs.select.outputs.test-set, 'download') && 'download' || 'nodownload' }}
@@ -160,7 +160,7 @@ jobs:
# NB may run apt-get install in Linux
uses: ytdl-org/setup-python@v1
env:
# Temporary (?) workaround for Python 3.5 failures - May 2024
# Temporary workaround for Python 3.5 failures - May 2024
PIP_TRUSTED_HOST: "pypi.python.org pypi.org files.pythonhosted.org"
with:
python-version: ${{ matrix.python-version }}
@@ -240,10 +240,7 @@ jobs:
# install 2.7
shell: bash
run: |
# Ubuntu 22.04 no longer has python-is-python2: fetch it
curl -L "http://launchpadlibrarian.net/474693132/python-is-python2_2.7.17-4_all.deb" -o python-is-python2.deb
sudo apt-get install -y python2
sudo dpkg --force-breaks -i python-is-python2.deb
sudo apt-get install -y python2 python-is-python2
echo "PYTHONHOME=/usr" >> "$GITHUB_ENV"
#-------- Python 2.6 --
- name: Set up Python 2.6 environment

View File

@@ -63,21 +63,9 @@ class TestCache(unittest.TestCase):
obj = {'x': 1, 'y': ['ä', '\\a', True]}
c.store('test_cache', 'k.', obj)
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
new_version = '.'.join(('%0.2d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
def test_cache_clear(self):
ydl = FakeYDL({
'cachedir': self.test_dir,
})
c = Cache(ydl)
c.store('test_cache', 'k.', 'kay')
c.store('test_cache', 'l.', 'ell')
self.assertEqual(c.load('test_cache', 'k.'), 'kay')
c.clear('test_cache', 'k.')
self.assertEqual(c.load('test_cache', 'k.'), None)
self.assertEqual(c.load('test_cache', 'l.'), 'ell')
if __name__ == '__main__':
unittest.main()

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
@@ -7,14 +6,12 @@ from __future__ import unicode_literals
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import math
import re
import time
from youtube_dl.compat import compat_str as str
from youtube_dl.compat import compat_str
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
NaN = object()
@@ -22,7 +19,7 @@ NaN = object()
class TestJSInterpreter(unittest.TestCase):
def _test(self, jsi_or_code, expected, func='f', args=()):
if isinstance(jsi_or_code, str):
if isinstance(jsi_or_code, compat_str):
jsi_or_code = JSInterpreter(jsi_or_code)
got = jsi_or_code.call_function(func, *args)
if expected is NaN:
@@ -43,27 +40,16 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return 42 + 7;}', 49)
self._test('function f(){return 42 + undefined;}', NaN)
self._test('function f(){return 42 + null;}', 42)
self._test('function f(){return 1 + "";}', '1')
self._test('function f(){return 42 + "7";}', '427')
self._test('function f(){return false + true;}', 1)
self._test('function f(){return "false" + true;}', 'falsetrue')
self._test('function f(){return '
'1 + "2" + [3,4] + {k: 56} + null + undefined + Infinity;}',
'123,4[object Object]nullundefinedInfinity')
def test_sub(self):
self._test('function f(){return 42 - 7;}', 35)
self._test('function f(){return 42 - undefined;}', NaN)
self._test('function f(){return 42 - null;}', 42)
self._test('function f(){return 42 - "7";}', 35)
self._test('function f(){return 42 - "spam";}', NaN)
def test_mul(self):
self._test('function f(){return 42 * 7;}', 294)
self._test('function f(){return 42 * undefined;}', NaN)
self._test('function f(){return 42 * null;}', 0)
self._test('function f(){return 42 * "7";}', 294)
self._test('function f(){return 42 * "eggs";}', NaN)
def test_div(self):
jsi = JSInterpreter('function f(a, b){return a / b;}')
@@ -71,26 +57,17 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, NaN, args=(JS_Undefined, 1))
self._test(jsi, float('inf'), args=(2, 0))
self._test(jsi, 0, args=(0, 3))
self._test(jsi, 6, args=(42, 7))
self._test(jsi, 0, args=(42, float('inf')))
self._test(jsi, 6, args=("42", 7))
self._test(jsi, NaN, args=("spam", 7))
def test_mod(self):
self._test('function f(){return 42 % 7;}', 0)
self._test('function f(){return 42 % 0;}', NaN)
self._test('function f(){return 42 % undefined;}', NaN)
self._test('function f(){return 42 % "7";}', 0)
self._test('function f(){return 42 % "beans";}', NaN)
def test_exp(self):
self._test('function f(){return 42 ** 2;}', 1764)
self._test('function f(){return 42 ** undefined;}', NaN)
self._test('function f(){return 42 ** null;}', 1)
self._test('function f(){return undefined ** 0;}', 1)
self._test('function f(){return undefined ** 42;}', NaN)
self._test('function f(){return 42 ** "2";}', 1764)
self._test('function f(){return 42 ** "spam";}', NaN)
def test_calc(self):
self._test('function f(a){return 2*a+1;}', 7, args=[3])
@@ -112,60 +89,13 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return 19 & 21;}', 17)
self._test('function f(){return 11 >> 2;}', 2)
self._test('function f(){return []? 2+3: 4;}', 5)
# equality
self._test('function f(){return 1 == 1}', True)
self._test('function f(){return 1 == 1.0}', True)
self._test('function f(){return 1 == "1"}', True)
self._test('function f(){return 1 == 2}', False)
self._test('function f(){return 1 != "1"}', False)
self._test('function f(){return 1 != 2}', True)
self._test('function f(){var x = {a: 1}; var y = x; return x == y}', True)
self._test('function f(){var x = {a: 1}; return x == {a: 1}}', False)
self._test('function f(){return NaN == NaN}', False)
self._test('function f(){return null == undefined}', True)
self._test('function f(){return "spam, eggs" == "spam, eggs"}', True)
# strict equality
self._test('function f(){return 1 === 1}', True)
self._test('function f(){return 1 === 1.0}', True)
self._test('function f(){return 1 === "1"}', False)
self._test('function f(){return 1 === 2}', False)
self._test('function f(){var x = {a: 1}; var y = x; return x === y}', True)
self._test('function f(){var x = {a: 1}; return x === {a: 1}}', False)
self._test('function f(){return NaN === NaN}', False)
self._test('function f(){return null === undefined}', False)
self._test('function f(){return null === null}', True)
self._test('function f(){return undefined === undefined}', True)
self._test('function f(){return "uninterned" === "uninterned"}', True)
self._test('function f(){return 1 === 1}', True)
self._test('function f(){return 1 === "1"}', False)
self._test('function f(){return 1 !== 1}', False)
self._test('function f(){return 1 !== "1"}', True)
# expressions
self._test('function f(){return 0 && 1 || 2;}', 2)
self._test('function f(){return 0 ?? 42;}', 0)
self._test('function f(){return "life, the universe and everything" < 42;}', False)
# https://github.com/ytdl-org/youtube-dl/issues/32815
self._test('function f(){return 0 - 7 * - 6;}', 42)
def test_bitwise_operators_typecast(self):
# madness
self._test('function f(){return null << 5}', 0)
self._test('function f(){return undefined >> 5}', 0)
self._test('function f(){return 42 << NaN}', 42)
self._test('function f(){return 42 << Infinity}', 42)
self._test('function f(){return 0.0 << null}', 0)
self._test('function f(){return NaN << 42}', 0)
self._test('function f(){return "21.9" << 1}', 42)
self._test('function f(){return true << "5";}', 32)
self._test('function f(){return true << true;}', 2)
self._test('function f(){return "19" & "21.9";}', 17)
self._test('function f(){return "19" & false;}', 0)
self._test('function f(){return "11.0" >> "2.1";}', 2)
self._test('function f(){return 5 ^ 9;}', 12)
self._test('function f(){return 0.0 << NaN}', 0)
self._test('function f(){return null << undefined}', 0)
self._test('function f(){return 21 << 4294967297}', 42)
def test_array_access(self):
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
@@ -180,8 +110,8 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
self._test('function f(){var x = 2; var y = ["a", "b"]; y[x%y["length"]]="z"; return y}', ['z', 'b'])
@unittest.skip('Not yet fully implemented')
def test_comments(self):
self._test('''
function f() {
@@ -200,15 +130,6 @@ class TestJSInterpreter(unittest.TestCase):
}
''', 3)
self._test('''
function f() {
var x = ( /* 1 + */ 2 +
/* 30 * 40 */
50);
return x;
}
''', 52)
def test_precedence(self):
self._test('''
function f() {
@@ -230,34 +151,6 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
# epoch 0
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
# undefined
self._test(jsi, NaN, args=[JS_Undefined])
# y,m,d, ... - may fail with older dates lacking DST data
jsi = JSInterpreter(
'function f() { return new Date(%s); }'
% ('2024, 5, 29, 2, 52, 12, 42',))
self._test(jsi, (
1719625932042 # UK value
+ (
+ 3600 # back to GMT
+ (time.altzone if time.daylight # host's DST
else time.timezone)
) * 1000))
# no arg
self.assertAlmostEqual(JSInterpreter(
'function f() { return new Date() - 0; }').call_function('f'),
time.time() * 1000, delta=100)
# Date.now()
self.assertAlmostEqual(JSInterpreter(
'function f() { return Date.now(); }').call_function('f'),
time.time() * 1000, delta=100)
# Date.parse()
jsi = JSInterpreter('function f(dt) { return Date.parse(dt); }')
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
# Date.UTC()
jsi = JSInterpreter('function f() { return Date.UTC(%s); }'
% ('1970, 0, 1, 0, 0, 0, 0',))
self._test(jsi, 0)
def test_call(self):
jsi = JSInterpreter('''
@@ -372,28 +265,8 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
def test_not(self):
self._test('function f() { return ! undefined; }', True)
self._test('function f() { return !0; }', True)
self._test('function f() { return !!0; }', False)
self._test('function f() { return ![]; }', False)
self._test('function f() { return !0 !== false; }', True)
def test_void(self):
self._test('function f() { return void 42; }', JS_Undefined)
def test_typeof(self):
self._test('function f() { return typeof undefined; }', 'undefined')
self._test('function f() { return typeof NaN; }', 'number')
self._test('function f() { return typeof Infinity; }', 'number')
self._test('function f() { return typeof true; }', 'boolean')
self._test('function f() { return typeof null; }', 'object')
self._test('function f() { return typeof "a string"; }', 'string')
self._test('function f() { return typeof 42; }', 'number')
self._test('function f() { return typeof 42.42; }', 'number')
self._test('function f() { var g = function(){}; return typeof g; }', 'function')
self._test('function f() { return typeof {key: "value"}; }', 'object')
# not yet implemented: Symbol, BigInt
self._test('function f() { return void 42; }', None)
def test_return_function(self):
jsi = JSInterpreter('''
@@ -410,7 +283,7 @@ class TestJSInterpreter(unittest.TestCase):
def test_undefined(self):
self._test('function f() { return undefined === undefined; }', True)
self._test('function f() { return undefined; }', JS_Undefined)
self._test('function f() { return undefined ?? 42; }', 42)
self._test('function f() {return undefined ?? 42; }', 42)
self._test('function f() { let v; return v; }', JS_Undefined)
self._test('function f() { let v; return v**0; }', 1)
self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }',
@@ -451,19 +324,8 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f() { let a; return a?.qq; }', JS_Undefined)
self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined)
def test_indexing(self):
self._test('function f() { return [1, 2, 3, 4][3]}', 4)
self._test('function f() { return [1, [2, [3, [4]]]][1][1][1][0]}', 4)
self._test('function f() { var o = {1: 2, 3: 4}; return o[3]}', 4)
self._test('function f() { var o = {1: 2, 3: 4}; return o["3"]}', 4)
self._test('function f() { return [1, [2, {3: [4]}]][1][1]["3"][0]}', 4)
self._test('function f() { return [1, 2, 3, 4].length}', 4)
self._test('function f() { var o = {1: 2, 3: 4}; return o.length}', JS_Undefined)
self._test('function f() { var o = {1: 2, 3: 4}; o["length"] = 42; return o.length}', 42)
def test_regex(self):
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
self._test('function f() { let a=/,,[/,913,/](,)}/; return a.source; }', ',,[/,913,/](,)}')
jsi = JSInterpreter('''
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
@@ -511,6 +373,13 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return -524999584 << 5}', 379882496)
self._test('function f(){return 1236566549 << 5}', 915423904)
def test_bitwise_operators_typecast(self):
# madness
self._test('function f(){return null << 5}', 0)
self._test('function f(){return undefined >> 5}', 0)
self._test('function f(){return 42 << NaN}', 42)
self._test('function f(){return 42 << Infinity}', 42)
def test_negative(self):
self._test('function f(){return 2 * -2.0 ;}', -4)
self._test('function f(){return 2 - - -2 ;}', 0)
@@ -542,19 +411,10 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
self._test(jsi, '', args=[[], '-'])
self._test('function f(){return '
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join()}',
'1,1,abc,[object Object],,,Infinity,NaN')
self._test('function f(){return '
'[1, 1.0, "abc", {a: 1}, null, undefined, Infinity, NaN].join("~")}',
'1~1~abc~[object Object]~~~Infinity~NaN')
def test_split(self):
test_result = list('test')
tests = [
'function f(a, b){return a.split(b)}',
'function f(a, b){return a["split"](b)}',
'function f(a, b){let x = ["split"]; return a[x[0]](b)}',
'function f(a, b){return String.prototype.split.call(a, b)}',
'function f(a, b){return String.prototype.split.apply(a, [b])}',
]
@@ -564,18 +424,6 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, test_result, args=['t-e-s-t', '-'])
self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', ''])
# RegExp split
self._test('function f(){return "test".split(/(?:)/)}',
['t', 'e', 's', 't'])
self._test('function f(){return "t-e-s-t".split(/[es-]+/)}',
['t', 't'])
# from MDN: surrogate pairs aren't handled: case 1 fails
# self._test('function f(){return "😄😄".split(/(?:)/)}',
# ['\ud83d', '\ude04', '\ud83d', '\ude04'])
# case 2 beats Py3.2: it gets the case 1 result
if sys.version_info >= (2, 6) and not ((3, 0) <= sys.version_info < (3, 3)):
self._test('function f(){return "😄😄".split(/(?:)/u)}',
['😄', '😄'])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
@@ -605,53 +453,6 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
def test_splice(self):
self._test('function f(){var T = ["0", "1", "2"]; T["splice"](2, 1, "0")[0]; return T }', ['0', '1', '0'])
def test_pop(self):
# pop
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
[8, [0, 1, 2, 3, 4, 5, 6, 7]])
self._test('function f(){return [].pop()}', JS_Undefined)
# push
self._test('function f(){var a = [0, 1, 2]; return [a.push(3, 4), a]}',
[5, [0, 1, 2, 3, 4]])
self._test('function f(){var a = [0, 1, 2]; return [a.push(), a]}',
[3, [0, 1, 2]])
def test_shift(self):
# shift
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.shift(), a]}',
[0, [1, 2, 3, 4, 5, 6, 7, 8]])
self._test('function f(){return [].shift()}', JS_Undefined)
# unshift
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(3, 4), a]}',
[5, [3, 4, 0, 1, 2]])
self._test('function f(){var a = [0, 1, 2]; return [a.unshift(), a]}',
[3, [0, 1, 2]])
def test_forEach(self):
self._test('function f(){var ret = []; var l = [4, 2]; '
'var log = function(e,i,a){ret.push([e,i,a]);}; '
'l.forEach(log); '
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
[2, 4, 1, [4, 2]])
self._test('function f(){var ret = []; var l = [4, 2]; '
'var log = function(e,i,a){this.push([e,i,a]);}; '
'l.forEach(log, ret); '
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
[2, 4, 1, [4, 2]])
def test_extract_function(self):
jsi = JSInterpreter('function a(b) { return b + 1; }')
func = jsi.extract_function('a')
self.assertEqual(func([2]), 3)
def test_extract_function_with_global_stack(self):
jsi = JSInterpreter('function c(d) { return d + e + f + g; }')
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
self.assertEqual(func([1]), 1111)
if __name__ == '__main__':
unittest.main()

View File

@@ -1,5 +1,4 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
@@ -13,7 +12,6 @@ import re
import string
from youtube_dl.compat import (
compat_contextlib_suppress,
compat_open as open,
compat_str,
compat_urlretrieve,
@@ -52,93 +50,23 @@ _SIG_TESTS = [
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
84,
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>',
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
83,
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F',
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B',
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
),
(
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
),
(
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AAOAOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7vgpDL0QwbdV06sCIEzpWqMGkFR20CFOS21Tp-7vj_EMu-m37KtXJoOy1',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
),
(
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
),
(
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
),
)
]
_NSIG_TESTS = [
@@ -208,16 +136,12 @@ _NSIG_TESTS = [
),
(
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA',
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
),
(
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
),
(
'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js',
'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w',
),
(
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
@@ -228,11 +152,7 @@ _NSIG_TESTS = [
),
(
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
),
(
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
),
(
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
@@ -262,94 +182,6 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
),
(
'https://www.youtube.com/s/player/f8f53e1a/player_ias.vflset/en_US/base.js',
'VTQOUOv0mCIeJ7i8kZB', 'kcfD8wy0sNLyNQ',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
),
(
'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
),
(
'https://www.youtube.com/s/player/f6e09c70/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
(
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
'eWYu5d5YeY_4LyEDc', 'XJQqf-N7Xra3gg',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
(
'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
),
(
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
),
(
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
(
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
(
'https://www.youtube.com/s/player/aa3fc80b/player_ias.vflset/en_US/base.js',
'0qY9dal2uzOnOGwa-48hha', 'VSh1KDfQMk-eag',
),
]
@@ -362,8 +194,6 @@ class TestPlayerInfo(unittest.TestCase):
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
('https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js', 'e7567ecf'),
('https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js', '643afba4'),
# obsolete
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
@@ -373,9 +203,8 @@ class TestPlayerInfo(unittest.TestCase):
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
)
ie = YoutubeIE(FakeYDL({'cachedir': False}))
for player_url, expected_player_id in PLAYER_URLS:
player_id = ie._extract_player_info(player_url)
player_id = YoutubeIE._extract_player_info(player_url)
self.assertEqual(player_id, expected_player_id)
@@ -387,19 +216,21 @@ class TestSignature(unittest.TestCase):
os.mkdir(self.TESTDATA_DIR)
def tearDown(self):
with compat_contextlib_suppress(OSError):
try:
for f in os.listdir(self.TESTDATA_DIR):
os.remove(f)
except OSError:
pass
def t_factory(name, sig_func, url_pattern):
def make_tfunc(url, sig_input, expected_sig):
m = url_pattern.match(url)
assert m, '{0!r} should follow URL format'.format(url)
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
assert m, '%r should follow URL format' % url
test_id = m.group('id')
def test_func(self):
basename = 'player-{0}.js'.format(test_id)
basename = 'player-{0}-{1}.js'.format(name, test_id)
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
@@ -414,7 +245,7 @@ def t_factory(name, sig_func, url_pattern):
def signature(jscode, sig_input):
func = YoutubeIE(FakeYDL({'cachedir': False}))._parse_sig_js(jscode)
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
src_sig = (
compat_str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
@@ -422,23 +253,17 @@ def signature(jscode, sig_input):
def n_sig(jscode, sig_input):
ie = YoutubeIE(FakeYDL({'cachedir': False}))
jsi = JSInterpreter(jscode)
jsi, _, func_code = ie._extract_n_function_code_jsi(sig_input, jsi)
return ie._extract_n_function_from_code(jsi, func_code)(sig_input)
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
return JSInterpreter(jscode).call_function(funcname, sig_input)
make_sig_test = t_factory(
'signature', signature,
re.compile(r'''(?x)
.+/(?P<h5>html5)?player(?(h5)(?:-en_US)?-|/)(?P<id>[a-zA-Z0-9/._-]+)
(?(h5)/(?:watch_as3|html5player))?\.js$
'''))
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
for test_spec in _SIG_TESTS:
make_sig_test(*test_spec)
make_nsig_test = t_factory(
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_/.-]+)\.js$'))
'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
for test_spec in _NSIG_TESTS:
make_nsig_test(*test_spec)

View File

@@ -540,14 +540,10 @@ class YoutubeDL(object):
"""Print message to stdout if not in quiet mode."""
return self.to_stdout(message, skip_eol, check_quiet=True)
def _write_string(self, s, out=None, only_once=False, _cache=set()):
if only_once and s in _cache:
return
def _write_string(self, s, out=None):
write_string(s, out=out, encoding=self.params.get('encoding'))
if only_once:
_cache.add(s)
def to_stdout(self, message, skip_eol=False, check_quiet=False, only_once=False):
def to_stdout(self, message, skip_eol=False, check_quiet=False):
"""Print message to stdout if not in quiet mode."""
if self.params.get('logger'):
self.params['logger'].debug(message)
@@ -556,9 +552,9 @@ class YoutubeDL(object):
terminator = ['\n', ''][skip_eol]
output = message + terminator
self._write_string(output, self._screen_file, only_once=only_once)
self._write_string(output, self._screen_file)
def to_stderr(self, message, only_once=False):
def to_stderr(self, message):
"""Print message to stderr."""
assert isinstance(message, compat_str)
if self.params.get('logger'):
@@ -566,7 +562,7 @@ class YoutubeDL(object):
else:
message = self._bidi_workaround(message)
output = message + '\n'
self._write_string(output, self._err_file, only_once=only_once)
self._write_string(output, self._err_file)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -645,11 +641,18 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info)
self._download_retcode = 1
def report_warning(self, message, only_once=False):
def report_warning(self, message, only_once=False, _cache={}):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
if only_once:
m_hash = hash((self, message))
m_cnt = _cache.setdefault(m_hash, 0)
_cache[m_hash] = m_cnt + 1
if m_cnt > 0:
return
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
@@ -660,7 +663,7 @@ class YoutubeDL(object):
else:
_msg_header = 'WARNING:'
warning_message = '%s %s' % (_msg_header, message)
self.to_stderr(warning_message, only_once=only_once)
self.to_stderr(warning_message)
def report_error(self, message, *args, **kwargs):
'''
@@ -674,16 +677,6 @@ class YoutubeDL(object):
kwargs['message'] = '%s %s' % (_msg_header, message)
self.trouble(*args, **kwargs)
def write_debug(self, message, only_once=False):
'''Log debug message or Print message to stderr'''
if not self.params.get('verbose', False):
return
message = '[debug] {0}'.format(message)
if self.params.get('logger'):
self.params['logger'].debug(message)
else:
self.to_stderr(message, only_once)
def report_unscoped_cookies(self, *args, **kwargs):
# message=None, tb=False, is_error=False
if len(args) <= 2:
@@ -2521,7 +2514,7 @@ class YoutubeDL(object):
self.get_encoding()))
write_string(encoding_str, encoding=None)
writeln_debug = lambda *s: self.write_debug(''.join(s))
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
writeln_debug('youtube-dl version ', __version__)
if _LAZY_LOADER:
writeln_debug('Lazy loading extractors enabled')

View File

@@ -18,7 +18,7 @@ from .compat import (
compat_getpass,
compat_register_utf8,
compat_shlex_split,
_workaround_optparse_bug9161,
workaround_optparse_bug9161,
)
from .utils import (
_UnsafeExtensionError,
@@ -50,7 +50,7 @@ def _real_main(argv=None):
# Compatibility fix for Windows
compat_register_utf8()
_workaround_optparse_bug9161()
workaround_optparse_bug9161()
setproctitle('youtube-dl')
@@ -409,8 +409,6 @@ def _real_main(argv=None):
'include_ads': opts.include_ads,
'default_search': opts.default_search,
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
'youtube_player_js_version': opts.youtube_player_js_version,
'youtube_player_js_variant': opts.youtube_player_js_variant,
'encoding': opts.encoding,
'extract_flat': opts.extract_flat,
'mark_watched': opts.mark_watched,

View File

@@ -1,4 +1,3 @@
# coding: utf-8
from __future__ import unicode_literals
import errno
@@ -11,14 +10,12 @@ import traceback
from .compat import (
compat_getenv,
compat_open as open,
compat_os_makedirs,
)
from .utils import (
error_to_compat_str,
escape_rfc3986,
expand_path,
is_outdated_version,
traverse_obj,
try_get,
write_json_file,
)
from .version import __version__
@@ -33,35 +30,23 @@ class Cache(object):
def __init__(self, ydl):
self._ydl = ydl
def _write_debug(self, *args, **kwargs):
self._ydl.write_debug(*args, **kwargs)
def _report_warning(self, *args, **kwargs):
self._ydl.report_warning(*args, **kwargs)
def _to_screen(self, *args, **kwargs):
self._ydl.to_screen(*args, **kwargs)
def _get_param(self, k, default=None):
return self._ydl.params.get(k, default)
def _get_root_dir(self):
res = self._get_param('cachedir')
res = self._ydl.params.get('cachedir')
if res is None:
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, self._YTDL_DIR)
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[\w.-]+$', section), \
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
'invalid section %r' % section
key = escape_rfc3986(key, safe='').replace('%', ',') # encode non-ascii characters
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
return os.path.join(
self._get_root_dir(), section, '%s.%s' % (key, dtype))
@property
def enabled(self):
return self._get_param('cachedir') is not False
return self._ydl.params.get('cachedir') is not False
def store(self, section, key, data, dtype='json'):
assert dtype in ('json',)
@@ -71,75 +56,61 @@ class Cache(object):
fn = self._get_cache_fn(section, key, dtype)
try:
compat_os_makedirs(os.path.dirname(fn), exist_ok=True)
self._write_debug('Saving {section}.{key} to cache'.format(section=section, key=key))
try:
os.makedirs(os.path.dirname(fn))
except OSError as ose:
if ose.errno != errno.EEXIST:
raise
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
except Exception:
tb = traceback.format_exc()
self._report_warning('Writing cache to {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
def clear(self, section, key, dtype='json'):
if not self.enabled:
return
fn = self._get_cache_fn(section, key, dtype)
self._write_debug('Clearing {section}.{key} from cache'.format(section=section, key=key))
try:
os.remove(fn)
except Exception as e:
if getattr(e, 'errno') == errno.ENOENT:
# file not found
return
tb = traceback.format_exc()
self._report_warning('Clearing cache from {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
self._ydl.report_warning(
'Writing cache to %r failed: %s' % (fn, tb))
def _validate(self, data, min_ver):
version = traverse_obj(data, self._VERSION_KEY)
version = try_get(data, lambda x: x[self._VERSION_KEY])
if not version: # Backward compatibility
data, version = {'data': data}, self._DEFAULT_VERSION
if not is_outdated_version(version, min_ver or '0', assume_new=False):
return data['data']
self._write_debug('Discarding old cache from version {version} (needs {min_ver})'.format(version=version, min_ver=min_ver))
self._ydl.to_screen(
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
def load(self, section, key, dtype='json', default=None, **kw_min_ver):
def load(self, section, key, dtype='json', default=None, min_ver=None):
assert dtype in ('json',)
min_ver = kw_min_ver.get('min_ver')
if not self.enabled:
return default
cache_fn = self._get_cache_fn(section, key, dtype)
try:
with open(cache_fn, encoding='utf-8') as cachef:
self._write_debug('Loading {section}.{key} from cache'.format(section=section, key=key), only_once=True)
return self._validate(json.load(cachef), min_ver)
except (ValueError, KeyError):
try:
file_size = 'size: %d' % os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = error_to_compat_str(oe)
self._report_warning('Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except Exception as e:
if getattr(e, 'errno') == errno.ENOENT:
# no cache available
return
self._report_warning('Cache retrieval from %s failed' % (cache_fn,))
with open(cache_fn, 'r', encoding='utf-8') as cachef:
return self._validate(json.load(cachef), min_ver)
except ValueError:
try:
file_size = os.path.getsize(cache_fn)
except (OSError, IOError) as oe:
file_size = error_to_compat_str(oe)
self._ydl.report_warning(
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
except IOError:
pass # No cache available
return default
def remove(self):
if not self.enabled:
self._to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
return
cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % (cachedir,))
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
self._to_screen(
'Removing cache dir %s .' % (cachedir,), skip_eol=True, ),
self._ydl.to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True)
if os.path.exists(cachedir):
self._to_screen('.', skip_eol=True)
self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir)
self._to_screen('.')
self._ydl.to_screen('.')

View File

@@ -10,10 +10,9 @@ from .compat import (
# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
# In case newly foldable Unicode characters are defined, paste the new version
# of the text inside the ''' marks.
# The text is expected to have only blank lines and lines with 1st character #,
# The text is expected to have only blank lines andlines with 1st character #,
# all ignored, and fold definitions like this:
# `from_hex_code; status; space_separated_to_hex_code_list; comment`
# Only `status` C/F are used.
# `from_hex_code; space_separated_to_hex_code_list; comment`
_map_str = '''
# CaseFolding-15.0.0.txt
@@ -1658,6 +1657,11 @@ _map = dict(
del _map_str
def _casefold(s):
def casefold(s):
assert isinstance(s, compat_str)
return ''.join((_map.get(c, c) for c in s))
__all__ = [
'casefold',
]

View File

@@ -16,6 +16,7 @@ import os
import platform
import re
import shlex
import shutil
import socket
import struct
import subprocess
@@ -23,15 +24,11 @@ import sys
import types
import xml.etree.ElementTree
_IDENTITY = lambda x: x
# naming convention
# 'compat_' + Python3_name.replace('.', '_')
# other aliases exist for convenience and/or legacy
# wrap disposable test values in type() to reclaim storage
# deal with critical unicode/str things first:
# compat_str, compat_basestring, compat_chr
# deal with critical unicode/str things first
try:
# Python 2
compat_str, compat_basestring, compat_chr = (
@@ -42,23 +39,18 @@ except NameError:
str, (str, bytes), chr
)
# compat_casefold
# casefold
try:
compat_str.casefold
compat_casefold = lambda s: s.casefold()
except AttributeError:
from .casefold import _casefold as compat_casefold
from .casefold import casefold as compat_casefold
# compat_collections_abc
try:
import collections.abc as compat_collections_abc
except ImportError:
import collections as compat_collections_abc
# compat_urllib_request
try:
import urllib.request as compat_urllib_request
except ImportError: # Python 2
@@ -87,15 +79,11 @@ except TypeError:
_add_init_method_arg(compat_urllib_request.Request)
del _add_init_method_arg
# compat_urllib_error
try:
import urllib.error as compat_urllib_error
except ImportError: # Python 2
import urllib2 as compat_urllib_error
# compat_urllib_parse
try:
import urllib.parse as compat_urllib_parse
except ImportError: # Python 2
@@ -110,23 +98,17 @@ except ImportError: # Python 2
compat_urlparse = compat_urllib_parse
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
# compat_urllib_response
try:
import urllib.response as compat_urllib_response
except ImportError: # Python 2
import urllib as compat_urllib_response
# compat_urllib_response.addinfourl
try:
compat_urllib_response.addinfourl.status
except AttributeError:
# .getcode() is deprecated in Py 3.
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
# compat_http_cookiejar
try:
import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
@@ -145,16 +127,12 @@ else:
compat_cookiejar_Cookie = compat_cookiejar.Cookie
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
# compat_http_cookies
try:
import http.cookies as compat_cookies
except ImportError: # Python 2
import Cookie as compat_cookies
compat_http_cookies = compat_cookies
# compat_http_cookies_SimpleCookie
if sys.version_info[0] == 2 or sys.version_info < (3, 3):
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
def load(self, rawdata):
@@ -177,15 +155,11 @@ else:
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
# compat_html_entities, probably useless now
try:
import html.entities as compat_html_entities
except ImportError: # Python 2
import htmlentitydefs as compat_html_entities
# compat_html_entities_html5
try: # Python >= 3.3
compat_html_entities_html5 = compat_html_entities.html5
except AttributeError:
@@ -2434,24 +2408,18 @@ except AttributeError:
# Py < 3.1
compat_http_client.HTTPResponse.getcode = lambda self: self.status
# compat_urllib_HTTPError
try:
from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
compat_urllib_HTTPError = compat_HTTPError
# compat_urllib_request_urlretrieve
try:
from urllib.request import urlretrieve as compat_urlretrieve
except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve
compat_urllib_request_urlretrieve = compat_urlretrieve
# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
try:
from HTMLParser import (
HTMLParser as compat_HTMLParser,
@@ -2464,33 +2432,22 @@ except ImportError: # Python 3
# HTMLParseError was deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling
class compat_HTMLParseError(Exception):
pass
compat_html_parser_HTMLParser = compat_HTMLParser
compat_html_parser_HTMLParseError = compat_HTMLParseError
# compat_subprocess_get_DEVNULL
try:
_DEVNULL = subprocess.DEVNULL
compat_subprocess_get_DEVNULL = lambda: _DEVNULL
except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
# compat_http_server
try:
import http.server as compat_http_server
except ImportError:
import BaseHTTPServer as compat_http_server
# compat_urllib_parse_unquote_to_bytes,
# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
# compat_urllib_parse_urlencode,
# compat_urllib_parse_parse_qs
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
@@ -2498,7 +2455,8 @@ try:
from urllib.parse import urlencode as compat_urllib_parse_urlencode
from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
_asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
else re.compile(r'([\x00-\x7f]+)'))
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
@@ -2566,21 +2524,24 @@ except ImportError: # Python 2
# Possible solutions are to either port it from python 3 with all
# the friends or manually ensure input query contains only byte strings.
# We will stick with latter thus recursively encoding the whole query.
def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
def encode_elem(e):
if isinstance(e, dict):
e = encode_dict(e)
elif isinstance(e, (list, tuple,)):
e = type(e)(encode_elem(el) for el in e)
list_e = encode_list(e)
e = tuple(list_e) if isinstance(e, tuple) else list_e
elif isinstance(e, compat_str):
e = e.encode(encoding, errors)
e = e.encode(encoding)
return e
def encode_dict(d):
return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
def encode_list(l):
return [encode_elem(e) for e in l]
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
# Python 2's version is apparently totally broken
@@ -2635,61 +2596,8 @@ except ImportError: # Python 2
('parse_qs', compat_parse_qs)):
setattr(compat_urllib_parse, name, fix)
try:
all(chr(i) in b'' for i in range(256))
except TypeError:
# not all chr(i) are str: patch Python2 quote
_safemaps = getattr(compat_urllib_parse, '_safemaps', {})
_always_safe = frozenset(compat_urllib_parse.always_safe)
def _quote(s, safe='/'):
"""quote('abc def') -> 'abc%20def'"""
if not s and s is not None: # fast path
return s
safe = frozenset(safe)
cachekey = (safe, _always_safe)
try:
safe_map = _safemaps[cachekey]
except KeyError:
safe = _always_safe | safe
safe_map = {}
for i in range(256):
c = chr(i)
safe_map[c] = (
c if (i < 128 and c in safe)
else b'%{0:02X}'.format(i))
_safemaps[cachekey] = safe_map
if safe.issuperset(s):
return s
return ''.join(safe_map[c] for c in s)
# linked code
def _quote_plus(s, safe=''):
return (
_quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
else _quote(s, safe))
# linked code
def _urlcleanup():
if compat_urllib_parse._urlopener:
compat_urllib_parse._urlopener.cleanup()
_safemaps.clear()
compat_urllib_parse.ftpcache.clear()
for name, fix in (
('quote', _quote),
('quote_plus', _quote_plus),
('urlcleanup', _urlcleanup)):
setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
setattr(compat_urllib_parse, name, fix)
compat_urllib_parse_parse_qs = compat_parse_qs
# compat_urllib_request_DataHandler
try:
from urllib.request import DataHandler as compat_urllib_request_DataHandler
except ImportError: # Python < 3.4
@@ -2724,20 +2632,16 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
# compat_xml_etree_ElementTree_ParseError
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
# compat_xml_etree_ElementTree_Element
_etree = xml.etree.ElementTree
etree = xml.etree.ElementTree
class _TreeBuilder(_etree.TreeBuilder):
class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
@@ -2746,7 +2650,7 @@ try:
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
# the following will crash with:
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
isinstance(None, _etree.Element)
isinstance(None, etree.Element)
from xml.etree.ElementTree import Element as compat_etree_Element
except TypeError: # Python <=2.6
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
@@ -2754,12 +2658,12 @@ compat_xml_etree_ElementTree_Element = compat_etree_Element
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
else:
# python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method)
try:
_etree_iter = _etree.Element.iter
_etree_iter = etree.Element.iter
except AttributeError: # Python <=2.6
def _etree_iter(root):
for el in root.findall('*'):
@@ -2771,29 +2675,27 @@ else:
# 2.7 source
def _XML(text, parser=None):
if not parser:
parser = _etree.XMLParser(target=_TreeBuilder())
parser = etree.XMLParser(target=_TreeBuilder())
parser.feed(text)
return parser.close()
def _element_factory(*args, **kwargs):
el = _etree.Element(*args, **kwargs)
el = etree.Element(*args, **kwargs)
for k, v in el.items():
if isinstance(v, bytes):
el.set(k, v.decode('utf-8'))
return el
def compat_etree_fromstring(text):
doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8')
return doc
# compat_xml_etree_register_namespace
try:
compat_etree_register_namespace = _etree.register_namespace
except AttributeError:
if hasattr(etree, 'register_namespace'):
compat_etree_register_namespace = etree.register_namespace
else:
def compat_etree_register_namespace(prefix, uri):
"""Register a namespace prefix.
The registry is global, and any existing mapping for either the
@@ -2802,16 +2704,14 @@ except AttributeError:
attributes in this namespace will be serialized with prefix if possible.
ValueError is raised if prefix is reserved or is invalid.
"""
if re.match(r'ns\d+$', prefix):
raise ValueError('Prefix format reserved for internal use')
for k, v in list(_etree._namespace_map.items()):
if re.match(r"ns\d+$", prefix):
raise ValueError("Prefix format reserved for internal use")
for k, v in list(etree._namespace_map.items()):
if k == uri or v == prefix:
del _etree._namespace_map[k]
_etree._namespace_map[uri] = prefix
del etree._namespace_map[k]
etree._namespace_map[uri] = prefix
compat_xml_etree_register_namespace = compat_etree_register_namespace
# compat_xpath, compat_etree_iterfind
if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . !
@@ -2998,6 +2898,7 @@ if sys.version_info < (2, 7):
def __init__(self, root):
self.root = root
##
# Generate all matching objects.
def compat_etree_iterfind(elem, path, namespaces=None):
@@ -3032,15 +2933,13 @@ if sys.version_info < (2, 7):
else:
compat_xpath = lambda xpath: xpath
compat_etree_iterfind = lambda element, match: element.iterfind(match)
compat_xpath = _IDENTITY
# compat_os_name
compat_os_name = os._name if os.name == 'java' else os.name
# compat_shlex_quote
if compat_os_name == 'nt':
def compat_shlex_quote(s):
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
@@ -3055,7 +2954,6 @@ else:
return "'" + s.replace("'", "'\"'\"'") + "'"
# compat_shlex.split
try:
args = shlex.split('中文')
assert (isinstance(args, list)
@@ -3071,7 +2969,6 @@ except (AssertionError, UnicodeEncodeError):
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
# compat_ord
def compat_ord(c):
if isinstance(c, int):
return c
@@ -3079,7 +2976,6 @@ def compat_ord(c):
return ord(c)
# compat_getenv, compat_os_path_expanduser, compat_setenv
if sys.version_info >= (3, 0):
compat_getenv = os.getenv
compat_expanduser = os.path.expanduser
@@ -3167,22 +3063,6 @@ else:
compat_os_path_expanduser = compat_expanduser
# compat_os_makedirs
try:
os.makedirs('.', exist_ok=True)
compat_os_makedirs = os.makedirs
except TypeError: # < Py3.2
from errno import EEXIST as _errno_EEXIST
def compat_os_makedirs(name, mode=0o777, exist_ok=False):
try:
return os.makedirs(name, mode=mode)
except OSError as ose:
if not (exist_ok and ose.errno == _errno_EEXIST):
raise
# compat_os_path_realpath
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
@@ -3196,7 +3076,6 @@ else:
compat_os_path_realpath = compat_realpath
# compat_print
if sys.version_info < (3, 0):
def compat_print(s):
from .utils import preferredencoding
@@ -3207,7 +3086,6 @@ else:
print(s)
# compat_getpass_getpass
if sys.version_info < (3, 0) and sys.platform == 'win32':
def compat_getpass(prompt, *args, **kwargs):
if isinstance(prompt, compat_str):
@@ -3220,42 +3098,36 @@ else:
compat_getpass_getpass = compat_getpass
# compat_input
try:
compat_input = raw_input
except NameError: # Python 3
compat_input = input
# compat_kwargs
# Python < 2.6.5 require kwargs to be bytes
try:
(lambda x: x)(**{'x': 0})
def _testfunc(x):
pass
_testfunc(**{'x': 0})
except TypeError:
def compat_kwargs(kwargs):
return dict((bytes(k), v) for k, v in kwargs.items())
else:
compat_kwargs = _IDENTITY
compat_kwargs = lambda kwargs: kwargs
# compat_numeric_types
try:
compat_numeric_types = (int, float, long, complex)
except NameError: # Python 3
compat_numeric_types = (int, float, complex)
# compat_integer_types
try:
compat_integer_types = (int, long)
except NameError: # Python 3
compat_integer_types = (int, )
# compat_int
compat_int = compat_integer_types[-1]
# compat_socket_create_connection
if sys.version_info < (2, 7):
def compat_socket_create_connection(address, timeout, source_address=None):
host, port = address
@@ -3282,7 +3154,6 @@ else:
compat_socket_create_connection = socket.create_connection
# compat_contextlib_suppress
try:
from contextlib import suppress as compat_contextlib_suppress
except ImportError:
@@ -3325,12 +3196,12 @@ except AttributeError:
# repeated .close() is OK, but just in case
with compat_contextlib_suppress(EnvironmentError):
f.close()
popen.wait()
popen.wait()
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def _workaround_optparse_bug9161():
def workaround_optparse_bug9161():
op = optparse.OptionParser()
og = optparse.OptionGroup(op, 'foo')
try:
@@ -3349,10 +3220,9 @@ def _workaround_optparse_bug9161():
optparse.OptionGroup.add_option = _compat_add_option
# compat_shutil_get_terminal_size
try:
from shutil import get_terminal_size as compat_get_terminal_size # Python >= 3.3
except ImportError:
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
compat_get_terminal_size = shutil.get_terminal_size
else:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(fallback=(80, 24)):
@@ -3382,33 +3252,27 @@ except ImportError:
columns = _columns
if lines is None or lines <= 0:
lines = _lines
return _terminal_size(columns, lines)
compat_shutil_get_terminal_size = compat_get_terminal_size
# compat_itertools_count
try:
type(itertools.count(start=0, step=1))
itertools.count(start=0, step=1)
compat_itertools_count = itertools.count
except TypeError: # Python 2.6 lacks step
except TypeError: # Python 2.6
def compat_itertools_count(start=0, step=1):
while True:
yield start
start += step
# compat_tokenize_tokenize
if sys.version_info >= (3, 0):
from tokenize import tokenize as compat_tokenize_tokenize
else:
from tokenize import generate_tokens as compat_tokenize_tokenize
# compat_struct_pack, compat_struct_unpack, compat_Struct
try:
type(struct.pack('!I', 0))
struct.pack('!I', 0)
except TypeError:
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
# See https://bugs.python.org/issue19099
@@ -3440,10 +3304,8 @@ else:
compat_Struct = struct.Struct
# builtins returning an iterator
# compat_map, compat_filter
# supposedly the same versioning as for zip below
# compat_map/filter() returning an iterator, supposedly the
# same versioning as for zip below
try:
from future_builtins import map as compat_map
except ImportError:
@@ -3460,7 +3322,6 @@ except ImportError:
except ImportError:
compat_filter = filter
# compat_zip
try:
from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x
@@ -3470,7 +3331,6 @@ except ImportError: # not 2.6+ or is 3.x
compat_zip = zip
# compat_itertools_zip_longest
# method renamed between Py2/3
try:
from itertools import zip_longest as compat_itertools_zip_longest
@@ -3478,8 +3338,7 @@ except ImportError:
from itertools import izip_longest as compat_itertools_zip_longest
# compat_collections_chain_map
# collections.ChainMap: new class
# new class in collections
try:
from collections import ChainMap as compat_collections_chain_map
# Py3.3's ChainMap is deficient
@@ -3535,22 +3394,19 @@ except ImportError:
def new_child(self, m=None, **kwargs):
m = m or {}
m.update(kwargs)
# support inheritance !
return type(self)(m, *self.maps)
return compat_collections_chain_map(m, *self.maps)
@property
def parents(self):
return type(self)(*(self.maps[1:]))
return compat_collections_chain_map(*(self.maps[1:]))
# compat_re_Pattern, compat_re_Match
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
compat_re_Pattern = type(re.compile(''))
# and on the type of a match
compat_re_Match = type(re.match('a', 'a'))
# compat_base64_b64decode
if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str):
@@ -3562,7 +3418,6 @@ else:
compat_base64_b64decode = compat_b64decode
# compat_ctypes_WINFUNCTYPE
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2].
@@ -3581,7 +3436,6 @@ else:
return ctypes.WINFUNCTYPE(*args, **kwargs)
# compat_open
if sys.version_info < (3, 0):
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
def compat_open(file_, *args, **kwargs):
@@ -3609,28 +3463,18 @@ except AttributeError:
def compat_datetime_timedelta_total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
# optional decompression packages
# compat_brotli
# PyPi brotli package implements 'br' Content-Encoding
try:
import brotli as compat_brotli
except ImportError:
compat_brotli = None
# compat_ncompress
# PyPi ncompress package implements 'compress' Content-Encoding
try:
import ncompress as compat_ncompress
except ImportError:
compat_ncompress = None
# compat_zstandard
# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
try:
import zstandard as compat_zstandard
except ImportError:
compat_zstandard = None
legacy = [
'compat_HTMLParseError',
@@ -3647,7 +3491,6 @@ legacy = [
'compat_getpass',
'compat_parse_qs',
'compat_realpath',
'compat_shlex_split',
'compat_urllib_parse_parse_qs',
'compat_urllib_parse_unquote',
'compat_urllib_parse_unquote_plus',
@@ -3661,6 +3504,8 @@ legacy = [
__all__ = [
'compat_html_parser_HTMLParseError',
'compat_html_parser_HTMLParser',
'compat_Struct',
'compat_base64_b64decode',
'compat_basestring',
@@ -3669,9 +3514,13 @@ __all__ = [
'compat_chr',
'compat_collections_abc',
'compat_collections_chain_map',
'compat_datetime_timedelta_total_seconds',
'compat_http_cookiejar',
'compat_http_cookiejar_Cookie',
'compat_http_cookies',
'compat_http_cookies_SimpleCookie',
'compat_contextlib_suppress',
'compat_ctypes_WINFUNCTYPE',
'compat_datetime_timedelta_total_seconds',
'compat_etree_fromstring',
'compat_etree_iterfind',
'compat_filter',
@@ -3680,16 +3529,9 @@ __all__ = [
'compat_getpass_getpass',
'compat_html_entities',
'compat_html_entities_html5',
'compat_html_parser_HTMLParseError',
'compat_html_parser_HTMLParser',
'compat_http_cookiejar',
'compat_http_cookiejar_Cookie',
'compat_http_cookies',
'compat_http_cookies_SimpleCookie',
'compat_http_client',
'compat_http_server',
'compat_input',
'compat_int',
'compat_integer_types',
'compat_itertools_count',
'compat_itertools_zip_longest',
@@ -3699,7 +3541,6 @@ __all__ = [
'compat_numeric_types',
'compat_open',
'compat_ord',
'compat_os_makedirs',
'compat_os_name',
'compat_os_path_expanduser',
'compat_os_path_realpath',
@@ -3709,7 +3550,7 @@ __all__ = [
'compat_register_utf8',
'compat_setenv',
'compat_shlex_quote',
'compat_shutil_get_terminal_size',
'compat_shlex_split',
'compat_socket_create_connection',
'compat_str',
'compat_struct_pack',
@@ -3729,5 +3570,5 @@ __all__ = [
'compat_xml_etree_register_namespace',
'compat_xpath',
'compat_zip',
'compat_zstandard',
'workaround_optparse_bug9161',
]

View File

@@ -11,7 +11,6 @@ from ..utils import (
decodeArgument,
encodeFilename,
error_to_compat_str,
float_or_none,
format_bytes,
shell_quote,
timeconvert,
@@ -368,27 +367,14 @@ class FileDownloader(object):
})
return True
min_sleep_interval, max_sleep_interval = (
float_or_none(self.params.get(interval), default=0)
for interval in ('sleep_interval', 'max_sleep_interval'))
sleep_note = ''
available_at = info_dict.get('available_at')
if available_at:
forced_sleep_interval = available_at - int(time.time())
if forced_sleep_interval > min_sleep_interval:
sleep_note = 'as required by the site'
min_sleep_interval = forced_sleep_interval
if forced_sleep_interval > max_sleep_interval:
max_sleep_interval = forced_sleep_interval
sleep_interval = random.uniform(
min_sleep_interval, max_sleep_interval or min_sleep_interval)
if sleep_interval > 0:
min_sleep_interval = self.params.get('sleep_interval')
if min_sleep_interval:
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
self.to_screen(
'[download] Sleeping %.2f seconds %s...' % (
sleep_interval, sleep_note))
'[download] Sleeping %s seconds...' % (
int(sleep_interval) if sleep_interval.is_integer()
else '%.2f' % sleep_interval))
time.sleep(sleep_interval)
return self.real_download(filename, info_dict)

View File

@@ -32,7 +32,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE):
IE_DESC = 'CC视频'
_IE_DESC = 'CC视频'
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{

View File

@@ -9,7 +9,7 @@ from ..utils import (
class CloudyIE(InfoExtractor):
IE_DESC = 'cloudy.ec'
_IE_DESC = 'cloudy.ec'
_VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)'
_TESTS = [{
'url': 'https://www.cloudy.ec/v/af511e2527aac',

View File

@@ -422,8 +422,6 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
# supply this in public subclasses: used in supported sites list, etc
# IE_DESC = 'short description of IE'
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""
@@ -505,7 +503,7 @@ class InfoExtractor(object):
if not self._x_forwarded_for_ip:
# Geo bypass mechanism is explicitly disabled by user
if not self.get_param('geo_bypass', True):
if not self._downloader.params.get('geo_bypass', True):
return
if not geo_bypass_context:
@@ -527,7 +525,7 @@ class InfoExtractor(object):
# Explicit IP block specified by user, use it right away
# regardless of whether extractor is geo bypassable or not
ip_block = self.get_param('geo_bypass_ip_block', None)
ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
# Otherwise use random IP block from geo bypass context but only
# if extractor is known as geo bypassable
@@ -538,8 +536,8 @@ class InfoExtractor(object):
if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
if self.get_param('verbose', False):
self.to_screen(
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(
'[debug] Using fake IP %s as X-Forwarded-For.'
% self._x_forwarded_for_ip)
return
@@ -548,7 +546,7 @@ class InfoExtractor(object):
# Explicit country code specified by user, use it right away
# regardless of whether extractor is geo bypassable or not
country = self.get_param('geo_bypass_country', None)
country = self._downloader.params.get('geo_bypass_country', None)
# Otherwise use random country code from geo bypass context but
# only if extractor is known as geo bypassable
@@ -559,8 +557,8 @@ class InfoExtractor(object):
if country:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
if self.get_param('verbose', False):
self.to_screen(
if self._downloader.params.get('verbose', False):
self._downloader.to_screen(
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
% (self._x_forwarded_for_ip, country.upper()))
@@ -586,9 +584,9 @@ class InfoExtractor(object):
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
if (not self.get_param('geo_bypass_country', None)
if (not self._downloader.params.get('geo_bypass_country', None)
and self._GEO_BYPASS
and self.get_param('geo_bypass', True)
and self._downloader.params.get('geo_bypass', True)
and not self._x_forwarded_for_ip
and countries):
country_code = random.choice(countries)
@@ -698,7 +696,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
else:
self.report_warning(errmsg)
self._downloader.report_warning(errmsg)
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
@@ -770,11 +768,11 @@ class InfoExtractor(object):
webpage_bytes = prefix + webpage_bytes
if not encoding:
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self.get_param('dump_intermediate_pages', False):
if self._downloader.params.get('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self.to_screen(dump)
if self.get_param('write_pages', False):
self._downloader.to_screen(dump)
if self._downloader.params.get('write_pages', False):
basen = '%s_%s' % (video_id, urlh.geturl())
if len(basen) > 240:
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
@@ -976,9 +974,19 @@ class InfoExtractor(object):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(self.__ie_msg(msg))
def write_debug(self, msg, only_once=False):
def write_debug(self, msg, only_once=False, _cache=[]):
'''Log debug message or Print message to stderr'''
self._downloader.write_debug(self.__ie_msg(msg), only_once=only_once)
if not self.get_param('verbose', False):
return
message = '[debug] ' + self.__ie_msg(msg)
logger = self.get_param('logger')
if logger:
logger.debug(message)
else:
if only_once and hash(message) in _cache:
return
self._downloader.to_stderr(message)
_cache.append(hash(message))
# name, default=None, *args, **kwargs
def get_param(self, name, *args, **kwargs):
@@ -1074,7 +1082,7 @@ class InfoExtractor(object):
if mobj:
break
if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name
else:
_name = name
@@ -1092,7 +1100,7 @@ class InfoExtractor(object):
elif fatal:
raise RegexNotFoundError('Unable to extract %s' % _name)
else:
self.report_warning('unable to extract %s' % _name + bug_reports_message())
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
@@ -1162,7 +1170,7 @@ class InfoExtractor(object):
username = None
password = None
if self.get_param('usenetrc', False):
if self._downloader.params.get('usenetrc', False):
try:
netrc_machine = netrc_machine or self._NETRC_MACHINE
info = netrc.netrc().authenticators(netrc_machine)
@@ -1173,7 +1181,7 @@ class InfoExtractor(object):
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
except (AttributeError, IOError, netrc.NetrcParseError) as err:
self.report_warning(
self._downloader.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
return username, password
@@ -1210,10 +1218,10 @@ class InfoExtractor(object):
"""
if self._downloader is None:
return None
downloader_params = self._downloader.params
twofactor = self.get_param('twofactor')
if twofactor is not None:
return twofactor
if downloader_params.get('twofactor') is not None:
return downloader_params['twofactor']
return compat_getpass('Type %s and press [Return]: ' % note)
@@ -1348,7 +1356,7 @@ class InfoExtractor(object):
elif fatal:
raise RegexNotFoundError('Unable to extract JSON-LD')
else:
self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
@@ -1579,7 +1587,7 @@ class InfoExtractor(object):
if f.get('vcodec') == 'none': # audio only
preference -= 50
if self.get_param('prefer_free_formats'):
if self._downloader.params.get('prefer_free_formats'):
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
else:
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
@@ -1591,7 +1599,7 @@ class InfoExtractor(object):
else:
if f.get('acodec') == 'none': # video only
preference -= 40
if self.get_param('prefer_free_formats'):
if self._downloader.params.get('prefer_free_formats'):
ORDER = ['flv', 'mp4', 'webm']
else:
ORDER = ['webm', 'flv', 'mp4']
@@ -1657,7 +1665,7 @@ class InfoExtractor(object):
""" Either "http:" or "https:", depending on the user's preferences """
return (
'http:'
if self.get_param('prefer_insecure', False)
if self._downloader.params.get('prefer_insecure', False)
else 'https:')
def _proto_relative_url(self, url, scheme=None):
@@ -3162,7 +3170,7 @@ class InfoExtractor(object):
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf
rtmp_url_parts = re.split(
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
r'((?:mp4|mp3|flv):)', source_url, 1)
if len(rtmp_url_parts) == 3:
rtmp_url, prefix, play_path = rtmp_url_parts
a_format.update({
@@ -3189,7 +3197,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(msg)
else:
self.report_warning(msg)
self._downloader.report_warning(msg)
return res
def _float(self, v, name, fatal=False, **kwargs):
@@ -3199,7 +3207,7 @@ class InfoExtractor(object):
if fatal:
raise ExtractorError(msg)
else:
self.report_warning(msg)
self._downloader.report_warning(msg)
return res
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
@@ -3208,12 +3216,12 @@ class InfoExtractor(object):
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
self.cookiejar.set_cookie(cookie)
self._downloader.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
req = sanitized_Request(url)
self.cookiejar.add_cookie_header(req)
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
def _apply_first_set_cookie_header(self, url_handle, cookie):
@@ -3273,8 +3281,8 @@ class InfoExtractor(object):
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
if (self.get_param('writesubtitles', False)
or self.get_param('listsubtitles')):
if (self._downloader.params.get('writesubtitles', False)
or self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
@@ -3295,11 +3303,7 @@ class InfoExtractor(object):
""" Merge subtitle dictionaries, language by language. """
# ..., * , target=None
target = kwargs.get('target')
if target is None:
target = dict(subtitle_dict1)
else:
subtitle_dicts = (subtitle_dict1,) + subtitle_dicts
target = kwargs.get('target') or dict(subtitle_dict1)
for subtitle_dict in subtitle_dicts:
for lang in subtitle_dict:
@@ -3307,8 +3311,8 @@ class InfoExtractor(object):
return target
def extract_automatic_captions(self, *args, **kwargs):
if (self.get_param('writeautomaticsub', False)
or self.get_param('listsubtitles')):
if (self._downloader.params.get('writeautomaticsub', False)
or self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
@@ -3316,9 +3320,9 @@ class InfoExtractor(object):
raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs):
if (self.get_param('mark_watched', False)
if (self._downloader.params.get('mark_watched', False)
and (self._get_login_info()[0] is not None
or self.get_param('cookiefile') is not None)):
or self._downloader.params.get('cookiefile') is not None)):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
@@ -3326,7 +3330,7 @@ class InfoExtractor(object):
def geo_verification_headers(self):
headers = {}
geo_verification_proxy = self.get_param('geo_verification_proxy')
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
if geo_verification_proxy:
headers['Ytdl-request-proxy'] = geo_verification_proxy
return headers

View File

@@ -35,6 +35,15 @@ from ..utils import (
class ITVBaseIE(InfoExtractor):
def _search_nextjs_data(self, webpage, video_id, **kw):
transform_source = kw.pop('transform_source', None)
fatal = kw.pop('fatal', True)
return self._parse_json(
self._search_regex(
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
webpage, 'next.js data', group='js', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
if errnote is False:
return False
@@ -100,9 +109,7 @@ class ITVBaseIE(InfoExtractor):
class ITVIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
IE_DESC = 'ITVX'
_WORKING = False
_IE_DESC = 'ITVX'
_TESTS = [{
'note': 'Hub URLs redirect to ITVX',
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
@@ -263,7 +270,7 @@ class ITVIE(ITVBaseIE):
'ext': determine_ext(href, 'vtt'),
})
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default={})
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
info = self._og_extract(webpage, require_title=not title)
@@ -316,7 +323,7 @@ class ITVIE(ITVBaseIE):
class ITVBTCCIE(ITVBaseIE):
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
IE_DESC = 'ITV articles: News, British Touring Car Championship'
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
_TESTS = [{
'note': 'British Touring Car Championship',
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',

View File

@@ -7,30 +7,35 @@ import operator
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urllib_request,
)
from .openload import PhantomJSwrapper
from ..utils import (
clean_html,
determine_ext,
extract_attributes,
ExtractorError,
get_element_by_class,
get_element_by_id,
int_or_none,
merge_dicts,
NO_DEFAULT,
orderedSet,
parse_count,
remove_quotes,
str_to_int,
remove_start,
T,
traverse_obj,
update_url_query,
urlencode_postdata,
url_or_none,
urlencode_postdata,
urljoin,
)
class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub'
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)'
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
@@ -47,10 +52,7 @@ class PornHubBaseIE(InfoExtractor):
r'<body\b[^>]+\bonload=["\']go\(\)',
r'document\.cookie\s*=\s*["\']RNKEY=',
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
if isinstance(url_or_request, compat_urllib_request.Request)
else url_or_request)
url = urlh.geturl()
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
webpage, urlh = dl(*args, **kwargs)
@@ -60,11 +62,17 @@ class PornHubBaseIE(InfoExtractor):
def _real_initialize(self):
self._logged_in = False
def _set_age_cookies(self, host):
self._set_cookie(host, 'age_verified', '1')
self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
self._set_cookie(host, 'accessAgeDisclaimerUK', '1')
self._set_cookie(host, 'accessPH', '1')
def _login(self, host):
if self._logged_in:
return
site = host.split('.')[0]
site = host.split('.', 1)[0]
# Both sites pornhub and pornhubpremium have separate accounts
# so there should be an option to provide credentials for both.
@@ -81,9 +89,9 @@ class PornHubBaseIE(InfoExtractor):
login_url, None, 'Downloading %s login page' % site)
def is_logged(webpage):
return any(re.search(p, webpage) for p in (
r'class=["\']signOut',
r'>Sign\s+[Oo]ut\s*<'))
return bool(
get_element_by_id('profileMenuDropdown', webpage)
or get_element_by_class('ph-icon-logout', webpage))
if is_logged(login_page):
self._logged_in = True
@@ -92,12 +100,12 @@ class PornHubBaseIE(InfoExtractor):
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'email': username,
'password': password,
})
response = self._download_json(
'https://www.%s/front/authenticate' % host, None,
'https://www.%s/front/authenticate' % host, 'login',
'Logging in to %s' % site,
data=urlencode_postdata(login_form),
headers={
@@ -119,17 +127,12 @@ class PornHubBaseIE(InfoExtractor):
class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
(?:
(?:[^/]+\.)?
%s
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
''' % PornHubBaseIE._PORNHUB_HOST_RE
IE_DESC = 'PornHub' # Thumbzilla -> Redtube.com, Modelhub -> uviu.com
_PORNHUB_PATH_RE = r'/(?:(?:view_video\.php%s)\?(?:.+&)?viewkey=%s)(?P<id>[\da-z]+)'
_VALID_URL = r'https?://(?:[^/]+\.)?%s%s' % (
PornHubBaseIE._PORNHUB_HOST_RE, _PORNHUB_PATH_RE % ('|video/show', '|embed/'))
_PORNHUB_PATH_RE = _PORNHUB_PATH_RE % ('', '')
_EMBED_REGEX = [r'<iframe\s[^>]*?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)']
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@@ -138,6 +141,7 @@ class PornHubIE(PornHubBaseIE):
'ext': 'mp4',
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
'uploader': 'Babes',
'uploader_id': '/users/babes-com',
'upload_date': '20130628',
'timestamp': 1372447216,
'duration': 361,
@@ -148,6 +152,10 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18,
'tags': list,
'categories': list,
'cast': list,
},
'params': {
'format': '[format_id!^=hls]',
},
}, {
# non-ASCII title
@@ -189,14 +197,27 @@ class PornHubIE(PornHubBaseIE):
'categories': list,
'subtitles': {
'en': [{
"ext": 'srt'
}]
'ext': 'srt',
}],
},
},
'params': {
'skip_download': True,
},
'skip': 'This video has been disabled',
}, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
'info_dict': {
'id': 'ph601dc30bae19a',
'ext': 'mp4',
'timestamp': 1612564932,
'age_limit': 18,
'uploader': 'Projekt Melody',
'uploader_id': 'projekt-melody',
'upload_date': '20210205',
'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
'thumbnail': r're:https?://.+',
},
}, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
@@ -216,9 +237,6 @@ class PornHubIE(PornHubBaseIE):
# private video
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
'only_matching': True,
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}, {
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
'only_matching': True,
@@ -244,28 +262,36 @@ class PornHubIE(PornHubBaseIE):
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
webpage)
@classmethod
def _extract_urls(cls, webpage):
def yield_urls():
for p in cls._EMBED_REGEX:
for from_ in re.finditer(p, webpage):
yield from_.group('url')
return list(yield_urls())
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(
return parse_count(self._search_regex(
pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
for _ in range(2):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') if mobj else self._generic_id(url)
_, urlh = self._download_webpage_handle(url, video_id)
if url == urlh.geturl():
break
url = urlh.geturl()
host = mobj.group('host') or 'pornhub.com'
video_id = mobj.group('id')
self._login(host)
self._set_cookie(host, 'age_verified', '1')
self._set_age_cookies(host)
def dl_webpage(platform):
self._set_cookie(host, 'platform', platform)
@@ -276,7 +302,7 @@ class PornHubIE(PornHubBaseIE):
webpage = dl_webpage('pc')
error_msg = self._html_search_regex(
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
(r'(?s)<div[^>]+class=("|\')(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
webpage, 'error message', default=None, group='error')
if error_msg:
@@ -285,9 +311,9 @@ class PornHubIE(PornHubBaseIE):
'PornHub said: %s' % error_msg,
expected=True, video_id=video_id)
if any(re.search(p, webpage) for p in (
r'class=["\']geoBlocked["\']',
r'>\s*This content is unavailable in your country')):
if bool(get_element_by_class('geoBlocked', webpage)
or self._search_regex(
r'>\s*This content is (unavailable) in your country', webpage, 'geo-restriction', default=False)):
self.raise_geo_restricted()
# video_title from flashvars contains whitespace instead of non-ASCII (see
@@ -304,36 +330,34 @@ class PornHubIE(PornHubBaseIE):
video_urls_set = set()
subtitles = {}
flashvars = self._parse_json(
self._search_regex(
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
video_id)
if flashvars:
subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
if subtitle_url:
subtitles.setdefault('en', []).append({
'url': subtitle_url,
'ext': 'srt',
})
thumbnail = flashvars.get('image_url')
duration = int_or_none(flashvars.get('video_duration'))
media_definitions = flashvars.get('mediaDefinitions')
if isinstance(media_definitions, list):
for definition in media_definitions:
if not isinstance(definition, dict):
continue
video_url = definition.get('videoUrl')
if not video_url or not isinstance(video_url, compat_str):
continue
if video_url in video_urls_set:
continue
video_urls_set.add(video_url)
video_urls.append(
(video_url, int_or_none(definition.get('quality'))))
else:
thumbnail, duration = [None] * 2
def add_video_url(video_url, quality=None):
v_url = url_or_none(video_url)
if not v_url:
return
if v_url in video_urls_set:
return
video_urls.append((v_url, quality))
video_urls_set.add(v_url)
def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
flashvars = self._search_json(r'var\s+flashvars_\d+\s*=', webpage, 'flashvars', video_id)
flashvars = traverse_obj(flashvars, {
'closedCaptionsFile': ('closedCaptionsFile', T(url_or_none)),
'image_url': ('image_url', T(url_or_none)),
'video_duration': ('video_duration', T(int_or_none)),
'mediaDefinitions': ('mediaDefinitions', lambda _, v: v['videoUrl']),
}) or {}
subtitle_url = flashvars.get('closedCaptionsFile')
if subtitle_url:
subtitles.setdefault('en', []).append({
'url': subtitle_url,
'ext': 'srt',
})
thumbnail = flashvars.get('image_url')
duration = flashvars.get('video_duration')
for definition in flashvars.get('mediaDefinitions') or []:
add_video_url(definition['videoUrl'], int_or_none(definition.get('quality')))
def extract_js_vars(webpage, pattern, default=None):
assignments = self._search_regex(
pattern, webpage, 'encoded url', default=default)
if not assignments:
@@ -363,51 +387,33 @@ class PornHubIE(PornHubBaseIE):
js_vars[vname] = parse_js_value(value)
return js_vars
def add_video_url(video_url):
v_url = url_or_none(video_url)
if not v_url:
return
if v_url in video_urls_set:
return
video_urls.append((v_url, None))
video_urls_set.add(v_url)
def parse_quality_items(quality_items):
q_items = self._parse_json(quality_items, video_id, fatal=False)
if not isinstance(q_items, list):
return
for item in q_items:
if isinstance(item, dict):
add_video_url(item.get('url'))
for v_url in traverse_obj(q_items, (Ellipsis, 'url')):
add_video_url(v_url)
if not video_urls:
FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
js_vars = extract_js_vars(
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
default=None)
if js_vars:
for key, format_url in js_vars.items():
if key.startswith(FORMAT_PREFIXES[-1]):
parse_quality_items(format_url)
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
add_video_url(format_url)
if not video_urls and re.search(
r'<[^>]+\bid=["\']lockedPlayer', webpage):
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES))
for key, format_url in js_vars.items():
if key.startswith(FORMAT_PREFIXES[-1]):
parse_quality_items(format_url)
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
add_video_url(format_url)
if not video_urls and get_element_by_id('lockedPlayer', webpage):
raise ExtractorError(
'Video %s is locked' % video_id, expected=True)
if not video_urls:
js_vars = extract_js_vars(
dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
add_video_url(js_vars['mediastring'])
add_video_url(traverse_obj(js_vars, 'mediastring'))
for mobj in re.finditer(
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage):
video_url = mobj.group('url')
if video_url not in video_urls_set:
video_urls.append((video_url, None))
video_urls_set.add(video_url)
add_video_url(mobj.group('url'))
upload_date = None
formats = []
@@ -433,6 +439,13 @@ class PornHubIE(PornHubBaseIE):
'height': height,
})
if not video_urls:
# import here to avoid mutually recursive dependency
from .generic import GenericIE
ret = GenericIE.generic_url_result(url, video_id=video_id, video_title=title, force_videoid=True)
ret['_type'] = 'url_transparent'
return ret
for video_url, height in video_urls:
if not upload_date:
upload_date = self._search_regex(
@@ -440,52 +453,55 @@ class PornHubIE(PornHubBaseIE):
if upload_date:
upload_date = upload_date.replace('/', '')
if '/video/get_media' in video_url:
# self._set_cookie(host, 'platform', 'tv')
medias = self._download_json(video_url, video_id, fatal=False)
if isinstance(medias, list):
for media in medias:
if not isinstance(media, dict):
continue
video_url = url_or_none(media.get('videoUrl'))
if not video_url:
continue
height = int_or_none(media.get('quality'))
add_format(video_url, height)
for media in traverse_obj(medias, lambda _, v: v['videoUrl']):
video_url = url_or_none(media['videoUrl'])
if not video_url:
continue
height = int_or_none(media.get('quality'))
add_format(video_url, height)
continue
add_format(video_url)
self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
model_profile = self._search_json(
r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
webpage, 'uploader', default=None)
webpage, 'uploader', default=None) or model_profile.get('username')
def extract_vote_count(kind, name):
return self._extract_count(
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
(r'<span[^>]+\bclass="votes%s"[^>]*>(\d[\d,\.]*[kKmM]?)</span>' % kind,
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
webpage, name)
view_count = self._extract_count(
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
r'<span class="count">(\d[\d,\.]*[kKmM]?)</span> [Vv]iews', webpage, 'view')
like_count = extract_vote_count('Up', 'like')
dislike_count = extract_vote_count('Down', 'dislike')
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
r'All Comments\s*<span>\((\d[\d,\.]*[kKmM]?)\)', webpage, 'comment')
def extract_list(meta_key):
div = self._search_regex(
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
% meta_key, webpage, meta_key, default=None)
if div:
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
return [clean_html(x) for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
info = self._search_json_ld(webpage, video_id, default={})
# description provided in JSON-LD is irrelevant
info['description'] = None
for k in ('url', 'description'):
info.pop(k, None)
return merge_dicts({
return merge_dicts(info, {
'id': video_id,
'uploader': video_uploader,
'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
@@ -498,8 +514,9 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18,
'tags': extract_list('tags'),
'categories': extract_list('categories'),
'cast': extract_list('pornstars'),
'subtitles': subtitles,
}, info)
})
class PornHubPlaylistBaseIE(PornHubBaseIE):
@@ -512,65 +529,28 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
# drop-down menu that uses similar pattern for videos (see
# https://github.com/ytdl-org/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
r'(?s)(<div\s[^>]*class=["\']container.+)', webpage,
'container', default=webpage)
def entries():
seen_ids = set()
for m in re.finditer(r'<\w+\s[^>]*(?<!-)\bhref\s*=\s*.("|\'|\b)%s\1[^>]*>' % (PornHubIE._PORNHUB_PATH_RE,), container):
video_id = m.group('id')
if video_id:
if video_id in seen_ids:
continue
seen_ids.add(video_id)
elt = extract_attributes(m.group(0))
video_url = urljoin(host, elt.get('href'))
yield video_url, video_id, elt.get('title')
return [
self.url_result(
'http://www.%s/%s' % (host, video_url),
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
container))
video_url, PornHubIE.ie_key(), video_title=title, video_id=video_id)
for video_url, video_id, title in entries()
]
class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118,
}, {
'url': 'https://www.pornhub.com/pornstar/liz-vicious',
'info_dict': {
'id': 'liz-vicious',
},
'playlist_mincount': 118,
}, {
'url': 'https://www.pornhub.com/users/russianveet69',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/channels/povd',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
'only_matching': True,
}, {
# Unavailable via /videos page, but available with direct pagination
# on pornstar page (see [1]), requires premium
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
'only_matching': True,
}, {
# Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user_id = mobj.group('id')
videos_url = '%s/videos' % mobj.group('url')
page = self._extract_page(url)
if page:
videos_url = update_url_query(videos_url, {'page': page})
return self.url_result(
videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
@staticmethod
def _has_more(webpage):
@@ -617,23 +597,77 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
for e in page_entries:
yield e
for from_ in page_entries:
yield from_
if not self._has_more(webpage):
break
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
mobj = self._match_valid_url(url)
host = mobj.group('host')
item_id = mobj.group('id')
self._login(host)
self._set_age_cookies(host)
return self.playlist_result(self._entries(url, host, item_id), item_id)
class PornHubUserIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?P<id>(?:(?:user|channel)s|model|pornstar)/[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph',
'info_dict': {
'id': 'zoe_ph',
},
'playlist_mincount': 118,
}, {
'url': 'https://www.pornhub.com/pornstar/liz-vicious',
'info_dict': {
'id': 'liz-vicious',
},
'playlist_mincount': 118,
}, {
'url': 'https://www.pornhub.com/users/russianveet69',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/channels/povd',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
'only_matching': True,
}, {
# Unavailable via /videos page, but available with direct pagination
# on pornstar page (see [1]), requires premium
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
'only_matching': True,
}, {
# Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
}, {
'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
user_id, host = mobj.group('id', 'host')
videos_url = '%s/videos' % mobj.group('url')
page = self._extract_page(url)
if page:
videos_url = update_url_query(videos_url, {'page': page})
self._login(host)
return self.playlist_result(self._entries(videos_url, host, user_id), user_id.split('/')[-1])
# return self.url_result(
# videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/|gif/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True,
@@ -642,16 +676,20 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
'info_dict': {
'id': 'pornstar/jenny-blighe/videos',
},
'playlist_mincount': 149,
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
'url': 'https://www.pornhub.com/pornstar/kylie-quinn/videos',
'info_dict': {
'id': 'pornstar/jenny-blighe/videos',
'id': 'pornstar/kylie-quinn/videos',
},
'playlist_mincount': 40,
'playlist_mincount': 80,
}, {
'url': 'https://www.pornhub.com/pornstar/kylie-quinn/videos?page=2',
'info_dict': {
'id': 'pornstar/kylie-quinn/videos',
},
# specific page: process just that page
'playlist_count': 40,
}, {
# default sorting as Top Rated Videos
'url': 'https://www.pornhub.com/channels/povd/videos',
@@ -727,27 +765,14 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/playlist/44121572',
'info_dict': {
'id': 'playlist/44121572',
},
'playlist_mincount': 132,
}, {
'url': 'https://www.pornhub.com/playlist/4667351',
'only_matching': True,
}, {
'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
}, {
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return (False
if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
else super(PornHubPagedVideoListIE, cls).suitable(url))
return (not any(ph.suitable(url) for ph in (PornHubIE, PornHubUserIE, PornHubUserVideosUploadIE))
and super(PornHubPagedVideoListIE, cls).suitable(url))
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
@@ -762,6 +787,62 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True,
}, {
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
}]
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/playlist/44121572',
'info_dict': {
'id': '44121572',
},
'playlist_mincount': 55,
}, {
'url': 'https://www.pornhub.com/playlist/4667351',
'only_matching': True,
}, {
'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
}, {
'url': 'https://de.pornhub.com/playlist/4667351?page=2',
'only_matching': True,
}]
def _entries(self, url, host, item_id):
webpage = self._download_webpage(url, item_id, 'Downloading page 1')
playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id')
video_count = int_or_none(
self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count'))
token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token')
page_count = (video_count - 36 + 39) // 40 + 1
page_entries = self._extract_entries(webpage, host)
def download_page(page_num):
note = 'Downloading page {0}'.format(page_num)
page_url = 'https://www.{0}/playlist/viewChunked'.format(host)
return self._download_webpage(page_url, item_id, note, query={
'id': playlist_id,
'page': page_num,
'token': token,
})
for page_num in range(1, page_count + 1):
if page_num > 1:
webpage = download_page(page_num)
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
for from_ in page_entries:
yield from_
def _real_extract(self, url):
mobj = self._match_valid_url(url)
host, item_id = mobj.group('host', 'id')
self._login(host)
self._set_age_cookies(host)
return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)

View File

@@ -47,7 +47,7 @@ class SenateISVPIE(InfoExtractor):
['vetaff', '76462', 'http://vetaff-f.akamaihd.net'],
['arch', '', 'http://ussenate-f.akamaihd.net/']
]
IE_NAME = 'senate.gov'
_IE_NAME = 'senate.gov'
_VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)'
_TESTS = [{
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -404,10 +404,6 @@ def parseOpts(overrideArguments=None):
'-F', '--list-formats',
action='store_true', dest='listformats',
help='List all available formats of requested videos')
video_format.add_option(
'--no-list-formats',
action='store_false', dest='listformats',
help='Do not list available formats of requested videos (default)')
video_format.add_option(
'--youtube-include-dash-manifest',
action='store_true', dest='youtube_include_dash_manifest', default=True,
@@ -416,17 +412,6 @@ def parseOpts(overrideArguments=None):
'--youtube-skip-dash-manifest',
action='store_false', dest='youtube_include_dash_manifest',
help='Do not download the DASH manifests and related data on YouTube videos')
video_format.add_option(
'--youtube-player-js-variant',
action='store', dest='youtube_player_js_variant',
help='For YouTube, the player javascript variant to use for n/sig deciphering; `actual` to follow the site; default `%default`.',
choices=('actual', 'main', 'tcc', 'tce', 'es5', 'es6', 'tv', 'tv_es6', 'phone', 'tablet'),
default='main', metavar='VARIANT')
video_format.add_option(
'--youtube-player-js-version',
action='store', dest='youtube_player_js_version',
help='For YouTube, the player javascript version to use for n/sig deciphering, specified as `signature_timestamp@hash`, or `actual` to follow the site; default `%default`',
default='20348@0004de42', metavar='STS@HASH')
video_format.add_option(
'--merge-output-format',
action='store', dest='merge_output_format', metavar='FORMAT', default=None,

View File

@@ -4204,16 +4204,12 @@ def lowercase_escape(s):
s)
def escape_rfc3986(s, safe=None):
def escape_rfc3986(s):
"""Escape non-ASCII characters as suggested by RFC 3986"""
if sys.version_info < (3, 0):
s = _encode_compat_str(s, 'utf-8')
if safe is not None:
safe = _encode_compat_str(safe, 'utf-8')
if safe is None:
safe = b"%/;:@&=+$,!~*'()?#[]"
# ensure unicode: after quoting, it can always be converted
return compat_str(compat_urllib_parse.quote(s, safe))
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
def escape_url(url):

View File

@@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2025.04.07'
__version__ = '2021.12.17'