mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 22:28:37 +09:00
Compare commits
56 Commits
df-test-cl
...
dlp-fifa-b
Author | SHA1 | Date | |
---|---|---|---|
![]() |
acb86ebd50 | ||
![]() |
2a926aef87 | ||
![]() |
195f22f679 | ||
![]() |
fc2beab0e7 | ||
![]() |
1a4fbe8462 | ||
![]() |
c2f9be3e63 | ||
![]() |
604762a9f8 | ||
![]() |
47e70fff8b | ||
![]() |
de39d1281c | ||
![]() |
27ed77aabb | ||
![]() |
c4b19a8816 | ||
![]() |
087ddc2371 | ||
![]() |
65ccb0dd4e | ||
![]() |
a874871801 | ||
![]() |
b7c25959f0 | ||
![]() |
f102e3dc4e | ||
![]() |
a19855f0f5 | ||
![]() |
ce5d36486e | ||
![]() |
d25cf62086 | ||
![]() |
502cefa41f | ||
![]() |
0faa45d6c0 | ||
![]() |
447edc48e6 | ||
![]() |
ee8560d01e | ||
![]() |
7135277fec | ||
![]() |
7bbd5b13d4 | ||
![]() |
c91cbf6072 | ||
![]() |
11b284c81f | ||
![]() |
c94a459a24 | ||
![]() |
6e2626f092 | ||
![]() |
c282e5f8d7 | ||
![]() |
2ced5a7912 | ||
![]() |
82e4eca711 | ||
![]() |
1b1442887e | ||
![]() |
22127b271c | ||
![]() |
d35557a75d | ||
![]() |
9493ffdb8b | ||
![]() |
7009bb9f31 | ||
![]() |
218c423bc0 | ||
![]() |
55c823634d | ||
![]() |
4050e10a4c | ||
![]() |
ed5c44e7b7 | ||
![]() |
0f6422590e | ||
![]() |
4c6fba3765 | ||
![]() |
d619dd712f | ||
![]() |
573b13410e | ||
![]() |
66e58dccc2 | ||
![]() |
556862bc91 | ||
![]() |
a8d5316aaf | ||
![]() |
fd3f3bebd0 | ||
![]() |
46b8ae2f52 | ||
![]() |
538ec65ba7 | ||
![]() |
b0a60ce203 | ||
![]() |
e52e8b8111 | ||
![]() |
d231b56717 | ||
![]() |
e6a836d54c | ||
![]() |
deee741fb1 |
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@@ -15,12 +15,12 @@ jobs:
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
- os: windows-2019
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: download
|
||||
|
@@ -128,6 +128,12 @@ def expect_value(self, got, expected, field):
|
||||
self.assertTrue(
|
||||
contains_str in got,
|
||||
'field %s (value: %r) should contain %r' % (field, got, contains_str))
|
||||
elif isinstance(expected, compat_str) and re.match(r'^lambda \w+:', expected):
|
||||
fn = eval(expected)
|
||||
suite = expected.split(':', 1)[1].strip()
|
||||
self.assertTrue(
|
||||
fn(got),
|
||||
'Expected field %s to meet condition %s, but value %r failed ' % (field, suite, got))
|
||||
elif isinstance(expected, type):
|
||||
self.assertTrue(
|
||||
isinstance(got, expected),
|
||||
@@ -137,7 +143,7 @@ def expect_value(self, got, expected, field):
|
||||
elif isinstance(expected, list) and isinstance(got, list):
|
||||
self.assertEqual(
|
||||
len(expected), len(got),
|
||||
'Expect a list of length %d, but got a list of length %d for field %s' % (
|
||||
'Expected a list of length %d, but got a list of length %d for field %s' % (
|
||||
len(expected), len(got), field))
|
||||
for index, (item_got, item_expected) in enumerate(zip(got, expected)):
|
||||
type_got = type(item_got)
|
||||
|
@@ -8,7 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text, aes_ecb_encrypt
|
||||
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||
import base64
|
||||
|
||||
@@ -58,6 +58,13 @@ class TestAES(unittest.TestCase):
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
def test_ecb_encrypt(self):
|
||||
data = bytes_to_intlist(self.secret_msg)
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
|
||||
self.assertEqual(
|
||||
encrypted,
|
||||
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -3,17 +3,18 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import shutil
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import shutil
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.cache import Cache
|
||||
from youtube_dl.utils import version_tuple
|
||||
from youtube_dl.version import __version__
|
||||
|
||||
|
||||
def _is_empty(d):
|
||||
@@ -54,6 +55,17 @@ class TestCache(unittest.TestCase):
|
||||
self.assertFalse(os.path.exists(self.test_dir))
|
||||
self.assertEqual(c.load('test_cache', 'k.'), None)
|
||||
|
||||
def test_cache_validation(self):
|
||||
ydl = FakeYDL({
|
||||
'cachedir': self.test_dir,
|
||||
})
|
||||
c = Cache(ydl)
|
||||
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
||||
c.store('test_cache', 'k.', obj)
|
||||
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
|
||||
new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
|
||||
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
from youtube_dl.compat import (
|
||||
compat_casefold,
|
||||
compat_getenv,
|
||||
compat_setenv,
|
||||
compat_etree_Element,
|
||||
@@ -118,9 +119,21 @@ class TestCompat(unittest.TestCase):
|
||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||
compat_etree_fromstring(xml)
|
||||
|
||||
def test_struct_unpack(self):
|
||||
def test_compat_struct_unpack(self):
|
||||
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||
|
||||
def test_compat_casefold(self):
|
||||
if hasattr(compat_str, 'casefold'):
|
||||
# don't bother to test str.casefold() (again)
|
||||
return
|
||||
# thanks https://bugs.python.org/file24232/casefolding.patch
|
||||
self.assertEqual(compat_casefold('hello'), 'hello')
|
||||
self.assertEqual(compat_casefold('hELlo'), 'hello')
|
||||
self.assertEqual(compat_casefold('ß'), 'ss')
|
||||
self.assertEqual(compat_casefold('fi'), 'fi')
|
||||
self.assertEqual(compat_casefold('\u03a3'), '\u03c3')
|
||||
self.assertEqual(compat_casefold('A\u0345\u03a3'), 'a\u03b9\u03c3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -33,6 +33,7 @@ from youtube_dl.compat import (
|
||||
from youtube_dl.utils import (
|
||||
DownloadError,
|
||||
ExtractorError,
|
||||
error_to_compat_str,
|
||||
format_bytes,
|
||||
UnavailableVideoError,
|
||||
)
|
||||
@@ -108,7 +109,7 @@ def generator(test_case, tname):
|
||||
for tc in test_cases:
|
||||
info_dict = tc.get('info_dict', {})
|
||||
if not (info_dict.get('id') and info_dict.get('ext')):
|
||||
raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')
|
||||
raise Exception('Test definition (%s) requires both \'id\' and \'ext\' keys present to define the output file' % (tname, ))
|
||||
|
||||
if 'skip' in test_case:
|
||||
print_skipping(test_case['skip'])
|
||||
@@ -161,7 +162,9 @@ def generator(test_case, tname):
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
|
||||
raise
|
||||
msg = getattr(err, 'msg', error_to_compat_str(err))
|
||||
err.msg = '%s (%s)' % (msg, tname, )
|
||||
raise err
|
||||
|
||||
if try_num == RETRIES:
|
||||
report_warning('%s failed due to network errors, skipping...' % tname)
|
||||
|
@@ -8,7 +8,12 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.jsinterp import JSInterpreter
|
||||
import math
|
||||
import re
|
||||
|
||||
from youtube_dl.compat import compat_re_Pattern
|
||||
|
||||
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
|
||||
|
||||
|
||||
class TestJSInterpreter(unittest.TestCase):
|
||||
@@ -19,6 +24,9 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function x3(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x3'), 42)
|
||||
|
||||
jsi = JSInterpreter('function x3(){42}')
|
||||
self.assertEqual(jsi.call_function('x3'), None)
|
||||
|
||||
jsi = JSInterpreter('var x5 = function(){return 42;}')
|
||||
self.assertEqual(jsi.call_function('x5'), 42)
|
||||
|
||||
@@ -45,14 +53,32 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function f(){return 1 << 5;}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 ** 5}')
|
||||
self.assertEqual(jsi.call_function('f'), 32)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 19 & 21;}')
|
||||
self.assertEqual(jsi.call_function('f'), 17)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 11 >> 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
|
||||
self.assertEqual(jsi.call_function('f'), 5)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 1 == 2}')
|
||||
self.assertEqual(jsi.call_function('f'), False)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 2)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
|
||||
self.assertFalse(jsi.call_function('f'))
|
||||
|
||||
def test_array_access(self):
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}')
|
||||
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
|
||||
|
||||
def test_parens(self):
|
||||
@@ -62,6 +88,10 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
jsi = JSInterpreter('function f(){return (1 + 2) * 3;}')
|
||||
self.assertEqual(jsi.call_function('f'), 9)
|
||||
|
||||
def test_quotes(self):
|
||||
jsi = JSInterpreter(r'function f(){return "a\"\\("}')
|
||||
self.assertEqual(jsi.call_function('f'), r'a"\(')
|
||||
|
||||
def test_assignments(self):
|
||||
jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}')
|
||||
self.assertEqual(jsi.call_function('f'), 31)
|
||||
@@ -104,18 +134,34 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
}''')
|
||||
self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
|
||||
|
||||
def test_builtins(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return NaN }
|
||||
''')
|
||||
self.assertTrue(math.isnan(jsi.call_function('x')))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 86000)
|
||||
jsi = JSInterpreter('''
|
||||
function x(dt) { return new Date(dt) - 0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
|
||||
|
||||
def test_call(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return 2; }
|
||||
function y(a) { return x() + a; }
|
||||
function y(a) { return x() + (a?a:0); }
|
||||
function z() { return y(3); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('z'), 5)
|
||||
self.assertEqual(jsi.call_function('y'), 2)
|
||||
|
||||
def test_for_loop(self):
|
||||
# function x() { a=0; for (i=0; i-10; i++) {a++} a }
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i = i + 1) {a++} a }
|
||||
function x() { a=0; for (i=0; i-10; i++) {a++} return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
@@ -154,21 +200,53 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 10)
|
||||
|
||||
def test_catch(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_finally(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
jsi = JSInterpreter('''
|
||||
function x() { try{throw 10} catch(e){return 5} finally {return 42} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_nested_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {try {
|
||||
try{throw 10} finally {throw 42}
|
||||
} catch(e){return 5} }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_for_loop_continue(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a }
|
||||
function x() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_break(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } a }
|
||||
function x() { a=0; for (i=0; i-10; i++) { break; a++ } return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 0)
|
||||
|
||||
def test_for_loop_try(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() {
|
||||
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
|
||||
return 42 }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_literal_list(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { [1, 2, "asdf", [5, 6, 7]][3] }
|
||||
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [5, 6, 7])
|
||||
|
||||
@@ -177,6 +255,156 @@ class TestJSInterpreter(unittest.TestCase):
|
||||
function x() { a=5; a -= 1, a+=3; return a }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 7)
|
||||
jsi = JSInterpreter('''
|
||||
function x() { a=5; return (a -= 1, a+=3, a); }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 7)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 5)
|
||||
|
||||
def test_void(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return void 42; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), None)
|
||||
|
||||
def test_return_function(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [1, function(){return 1}][1] }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x')([]), 1)
|
||||
|
||||
def test_null(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return null; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [null > 0, null < 0, null == 0, null === 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [null >= 0, null <= 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [True, True])
|
||||
|
||||
def test_undefined(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return undefined === undefined; }
|
||||
''')
|
||||
self.assertTrue(jsi.call_function('x'))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return undefined; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return v; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [True, True, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === 0, undefined == 0, undefined < 0, undefined > 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined >= 0, undefined <= 0]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined > null, undefined < null, undefined == null, undefined === null]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, True, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return [undefined === null, undefined == null, undefined < null, undefined > null]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, True, False, False])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; }
|
||||
''')
|
||||
for y in jsi.call_function('x'):
|
||||
self.assertTrue(math.isnan(y))
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return v**0; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), 1)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined])
|
||||
|
||||
jsi = JSInterpreter('function x(){return undefined ?? 42; }')
|
||||
self.assertEqual(jsi.call_function('x'), 42)
|
||||
|
||||
def test_object(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { return {}; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), {})
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x'), [42, 0])
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a; return a?.qq; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a = {m1: 42, m2: 0 }; return a?.qq; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), JS_Undefined)
|
||||
|
||||
def test_regex(self):
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; }
|
||||
''')
|
||||
self.assertIs(jsi.call_function('x'), None)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/; return a; }
|
||||
''')
|
||||
self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern)
|
||||
|
||||
jsi = JSInterpreter('''
|
||||
function x() { let a=/,,[/,913,/](,)}/i; return a; }
|
||||
''')
|
||||
self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
|
||||
|
||||
def test_char_code_at(self):
|
||||
jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
|
||||
self.assertEqual(jsi.call_function('x', 0), 116)
|
||||
self.assertEqual(jsi.call_function('x', 1), 101)
|
||||
self.assertEqual(jsi.call_function('x', 2), 115)
|
||||
self.assertEqual(jsi.call_function('x', 3), 116)
|
||||
self.assertEqual(jsi.call_function('x', 4), None)
|
||||
self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
|
||||
|
||||
def test_bitwise_operators_overflow(self):
|
||||
jsi = JSInterpreter('function x(){return -524999584 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 379882496)
|
||||
|
||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -12,7 +12,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Various small unit tests
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from youtube_dl.utils import (
|
||||
@@ -40,11 +42,14 @@ from youtube_dl.utils import (
|
||||
get_element_by_attribute,
|
||||
get_elements_by_class,
|
||||
get_elements_by_attribute,
|
||||
get_first,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
is_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
LazyList,
|
||||
limit_length,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -79,6 +84,8 @@ from youtube_dl.utils import (
|
||||
strip_or_none,
|
||||
subtitles_filename,
|
||||
timeconvert,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -92,6 +99,7 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
urshift,
|
||||
update_url_query,
|
||||
variadic,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
@@ -112,12 +120,18 @@ from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_os_name,
|
||||
compat_setenv,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
|
||||
# yt-dlp shim
|
||||
def assertCountEqual(self, expected, got, msg='count should be the same'):
|
||||
return self.assertEqual(len(tuple(expected)), len(tuple(got)), msg=msg)
|
||||
|
||||
def test_timeconvert(self):
|
||||
self.assertTrue(timeconvert('') is None)
|
||||
self.assertTrue(timeconvert('bougrg') is None)
|
||||
@@ -370,6 +384,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
|
||||
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
||||
self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@@ -1475,6 +1492,315 @@ Line 1
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
|
||||
def test_LazyList(self):
|
||||
it = list(range(10))
|
||||
|
||||
self.assertEqual(list(LazyList(it)), it)
|
||||
self.assertEqual(LazyList(it).exhaust(), it)
|
||||
self.assertEqual(LazyList(it)[5], it[5])
|
||||
|
||||
self.assertEqual(LazyList(it)[5:], it[5:])
|
||||
self.assertEqual(LazyList(it)[:5], it[:5])
|
||||
self.assertEqual(LazyList(it)[::2], it[::2])
|
||||
self.assertEqual(LazyList(it)[1::2], it[1::2])
|
||||
self.assertEqual(LazyList(it)[5::-1], it[5::-1])
|
||||
self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2])
|
||||
self.assertEqual(LazyList(it)[::-1], it[::-1])
|
||||
|
||||
self.assertTrue(LazyList(it))
|
||||
self.assertFalse(LazyList(range(0)))
|
||||
self.assertEqual(len(LazyList(it)), len(it))
|
||||
self.assertEqual(repr(LazyList(it)), repr(it))
|
||||
self.assertEqual(compat_str(LazyList(it)), compat_str(it))
|
||||
|
||||
self.assertEqual(list(LazyList(it, reverse=True)), it[::-1])
|
||||
self.assertEqual(list(reversed(LazyList(it))[::-1]), it)
|
||||
self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7])
|
||||
|
||||
def test_LazyList_laziness(self):
|
||||
|
||||
def test(ll, idx, val, cache):
|
||||
self.assertEqual(ll[idx], val)
|
||||
self.assertEqual(ll._cache, list(cache))
|
||||
|
||||
ll = LazyList(range(10))
|
||||
test(ll, 0, 0, range(1))
|
||||
test(ll, 5, 5, range(6))
|
||||
test(ll, -3, 7, range(10))
|
||||
|
||||
ll = LazyList(range(10), reverse=True)
|
||||
test(ll, -1, 0, range(1))
|
||||
test(ll, 3, 6, range(10))
|
||||
|
||||
ll = LazyList(itertools.count())
|
||||
test(ll, 10, 10, range(11))
|
||||
ll = reversed(ll)
|
||||
test(ll, -15, 14, range(15))
|
||||
|
||||
def test_try_call(self):
|
||||
def total(*x, **kwargs):
|
||||
return sum(x) + sum(kwargs.values())
|
||||
|
||||
self.assertEqual(try_call(None), None,
|
||||
msg='not a fn should give None')
|
||||
self.assertEqual(try_call(lambda: 1), 1,
|
||||
msg='int fn with no expected_type should give int')
|
||||
self.assertEqual(try_call(lambda: 1, expected_type=int), 1,
|
||||
msg='int fn with expected_type int should give int')
|
||||
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
|
||||
msg='int fn with wrong expected_type should give None')
|
||||
self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1,
|
||||
msg='fn should accept arglist')
|
||||
self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1,
|
||||
msg='fn should accept kwargs')
|
||||
self.assertEqual(try_call(lambda: 1, expected_type=dict), None,
|
||||
msg='int fn with no expected_type should give None')
|
||||
self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42,
|
||||
msg='expect first int result with expected_type int')
|
||||
|
||||
def test_variadic(self):
|
||||
self.assertEqual(variadic(None), (None, ))
|
||||
self.assertEqual(variadic('spam'), ('spam', ))
|
||||
self.assertEqual(variadic('spam', allowed_types=dict), 'spam')
|
||||
|
||||
def test_traverse_obj(self):
|
||||
_TEST_DATA = {
|
||||
100: 100,
|
||||
1.2: 1.2,
|
||||
'str': 'str',
|
||||
'None': None,
|
||||
'...': Ellipsis,
|
||||
'urls': [
|
||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||
],
|
||||
'data': (
|
||||
{'index': 2},
|
||||
{'index': 3},
|
||||
),
|
||||
'dict': {},
|
||||
}
|
||||
|
||||
# Test base functionality
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||
msg='allow tuple path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
||||
msg='allow list path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
||||
msg='allow iterable path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
||||
msg='single items should be treated as a path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
||||
|
||||
# Test Ellipsis behavior
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
||||
(item for item in _TEST_DATA.values() if item is not None),
|
||||
msg='`...` should give all values except `None`')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
||||
msg='`...` selection for dicts should select all values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='nested `...` queries should work')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
|
||||
msg='`...` query result should be flattened')
|
||||
|
||||
# Test function as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||
[_TEST_DATA['urls']],
|
||||
msg='function as query key should perform a filter based on (key, value)')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], compat_str)), {'str'},
|
||||
msg='exceptions in the query function should be caught')
|
||||
|
||||
# Test alternative paths
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||
msg='multiple `paths` should be treated as alternative paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||
msg='alternatives should exit early')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||
msg='alternatives should return `default` if exhausted')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, 'fail'), 100), 100,
|
||||
msg='alternatives should track their own branching return')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', Ellipsis), ('data', Ellipsis)), list(_TEST_DATA['data']),
|
||||
msg='alternatives on empty objects should search further')
|
||||
|
||||
# Test branch and path nesting
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||
msg='tuple as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
||||
msg='list as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
||||
msg='double nesting in path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
||||
msg='do not fail early on branching')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='triple nesting in path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (Ellipsis, 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='ellipsis as branch path start gets flattened')
|
||||
|
||||
# Test dictionary as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
||||
msg='dict key should result in a dict with the same keys')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
||||
{0: 'https://www.example.com/0'},
|
||||
msg='dict key should allow paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='tuple in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='double nesting in dict path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||
msg='triple nesting in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||
msg='remove `None` values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=Ellipsis), {0: Ellipsis},
|
||||
msg='do not remove `None` values if `default`')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {0: {}},
|
||||
msg='do not remove empty values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: {}},
|
||||
msg='do not remove empty values when dict key and a default')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', Ellipsis)}), {0: []},
|
||||
msg='if branch in dict key not successful, return `[]`')
|
||||
|
||||
# Testing default parameter behavior
|
||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
||||
msg='default value should be `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=Ellipsis), Ellipsis,
|
||||
msg='chained fails should result in default')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
||||
msg='should not short cirquit on `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
||||
msg='invalid dict key should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
||||
msg='`None` is a deliberate sentinel and should become `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||
msg='`IndexError` should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=1), 1,
|
||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail'), default=None), None,
|
||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (Ellipsis, 'fail')), [],
|
||||
msg='if branched but not successful return `[]`, not `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', Ellipsis)), [],
|
||||
msg='if branched but object is empty return `[]`, not `default`')
|
||||
|
||||
# Testing expected_type behavior
|
||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=compat_str), 'str',
|
||||
msg='accept matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), None,
|
||||
msg='reject non matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: compat_str(x)), '0',
|
||||
msg='transform type using type function')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str',
|
||||
expected_type=lambda _: 1 / 0), None,
|
||||
msg='wrap expected_type function in try_call')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, Ellipsis, expected_type=compat_str), ['str'],
|
||||
msg='eliminate items that expected_type fails on')
|
||||
|
||||
# Test get_all behavior
|
||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', Ellipsis), get_all=False), 0,
|
||||
msg='if not `get_all`, return only first matching value')
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, Ellipsis, get_all=False), [0, 1, 2],
|
||||
msg='do not overflatten if not `get_all`')
|
||||
|
||||
# Test casesense behavior
|
||||
_CASESENSE_DATA = {
|
||||
'KeY': 'value0',
|
||||
0: {
|
||||
'KeY': 'value1',
|
||||
0: {'KeY': 'value2'},
|
||||
},
|
||||
# FULLWIDTH LATIN CAPITAL LETTER K
|
||||
'\uff2bey': 'value3',
|
||||
}
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
||||
msg='dict keys should be case sensitive unless `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
||||
casesense=False), 'value0',
|
||||
msg='allow non matching key case if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, '\uff4bey', # FULLWIDTH LATIN SMALL LETTER K
|
||||
casesense=False), 'value3',
|
||||
msg='allow non matching Unicode key case if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
||||
casesense=False), ['value1'],
|
||||
msg='allow non matching key case in branch if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
||||
casesense=False), ['value2'],
|
||||
msg='allow non matching key case in branch path if `casesense`')
|
||||
|
||||
# Test traverse_string behavior
|
||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
||||
msg='do not traverse into string if not `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
||||
_traverse_string=True), 's',
|
||||
msg='traverse into string if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
||||
_traverse_string=True), '.',
|
||||
msg='traverse into converted data if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', Ellipsis),
|
||||
_traverse_string=True), list('str'),
|
||||
msg='`...` branching into string should result in list')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||
_traverse_string=True), ['s', 'r'],
|
||||
msg='branching into string should result in list')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda _, x: x),
|
||||
_traverse_string=True), list('str'),
|
||||
msg='function branching into string should result in list')
|
||||
|
||||
# Test is_user_input behavior
|
||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
|
||||
_is_user_input=True), 3,
|
||||
msg='allow for string indexing if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
|
||||
_is_user_input=True), tuple(range(8))[3:],
|
||||
msg='allow for string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
|
||||
_is_user_input=True), tuple(range(8))[:4:2],
|
||||
msg='allow step in string slice if `is_user_input`')
|
||||
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
|
||||
_is_user_input=True), range(8),
|
||||
msg='`:` should be treated as `...` if `is_user_input`')
|
||||
with self.assertRaises(TypeError, msg='too many params should result in error'):
|
||||
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), _is_user_input=True)
|
||||
|
||||
# Test re.Match as input obj
|
||||
mobj = re.match(r'^0(12)(?P<group>3)(4)?$', '0123')
|
||||
self.assertEqual(traverse_obj(mobj, Ellipsis), [x for x in mobj.groups() if x is not None],
|
||||
msg='`...` on a `re.Match` should give its `groups()`')
|
||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
||||
msg='str key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
||||
msg='int key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
||||
msg='str key on a `re.Match` should respect casesense')
|
||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||
msg='failing int key on a `re.Match` should return `default`')
|
||||
|
||||
def test_get_first(self):
|
||||
self.assertEqual(get_first([{'a': None}, {'a': 'spam'}], 'a'), 'spam')
|
||||
|
||||
def test_join_nonempty(self):
|
||||
self.assertEqual(join_nonempty('a', 'b'), 'a-b')
|
||||
self.assertEqual(join_nonempty(
|
||||
'a', 'b', 'c', 'd',
|
||||
from_dict={'a': 'c', 'c': [], 'b': 'd', 'd': None}), 'c-d')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -12,10 +12,11 @@ import io
|
||||
import re
|
||||
import string
|
||||
|
||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.jsinterp import JSInterpreter
|
||||
from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||
|
||||
_SIG_TESTS = [
|
||||
(
|
||||
@@ -90,12 +91,57 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
|
||||
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||
'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js',
|
||||
'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js',
|
||||
'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js',
|
||||
'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js',
|
||||
'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
|
||||
'5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
|
||||
'5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
|
||||
'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
|
||||
'-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js',
|
||||
'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||
|
@@ -721,7 +721,7 @@ class YoutubeDL(object):
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return sanitize_path(filename)
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
@@ -1570,9 +1570,6 @@ class YoutubeDL(object):
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
if not url:
|
||||
@@ -1585,7 +1582,10 @@ class YoutubeDL(object):
|
||||
return True
|
||||
|
||||
# Filter out malformed formats for better extraction robustness
|
||||
formats = list(filter(is_wellformed, formats))
|
||||
formats = list(filter(is_wellformed, formats or []))
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
formats_dict = {}
|
||||
|
||||
@@ -1779,10 +1779,9 @@ class YoutubeDL(object):
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
|
||||
if self._num_downloads >= max_downloads:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
@@ -2059,9 +2058,12 @@ class YoutubeDL(object):
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.report_error('postprocessing: %s' % str(err))
|
||||
self.report_error('postprocessing: %s' % error_to_compat_str(err))
|
||||
return
|
||||
self.record_download_archive(info_dict)
|
||||
# avoid possible nugatory search for further items (PR #26638)
|
||||
if self._num_downloads >= max_downloads:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
|
@@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
def pkcs7_padding(data):
|
||||
"""
|
||||
PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@returns {int[]} padding data
|
||||
"""
|
||||
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
|
||||
return data + [remaining_length] * remaining_length
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, counter):
|
||||
"""
|
||||
Decrypt with aes in counter mode
|
||||
@@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
|
||||
previous_cipher_block = iv
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(block)
|
||||
block += [remaining_length] * remaining_length
|
||||
block = pkcs7_padding(block)
|
||||
mixed_block = xor(block, previous_cipher_block)
|
||||
|
||||
encrypted_block = aes_encrypt(mixed_block, expanded_key)
|
||||
@@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def aes_ecb_encrypt(data, key):
|
||||
"""
|
||||
Encrypt with aes in ECB mode. Using PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
block = pkcs7_padding(block)
|
||||
|
||||
encrypted_block = aes_encrypt(block, expanded_key)
|
||||
encrypted_data += encrypted_block
|
||||
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def key_expansion(data):
|
||||
"""
|
||||
Generate key schedule
|
||||
@@ -303,7 +336,7 @@ def xor(data1, data2):
|
||||
|
||||
|
||||
def rijndael_mul(a, b):
|
||||
if(a == 0 or b == 0):
|
||||
if (a == 0 or b == 0):
|
||||
return 0
|
||||
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
|
||||
|
||||
|
@@ -10,12 +10,21 @@ import traceback
|
||||
|
||||
from .compat import compat_getenv
|
||||
from .utils import (
|
||||
error_to_compat_str,
|
||||
expand_path,
|
||||
is_outdated_version,
|
||||
try_get,
|
||||
write_json_file,
|
||||
)
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
||||
_YTDL_DIR = 'youtube-dl'
|
||||
_VERSION_KEY = _YTDL_DIR + '_version'
|
||||
_DEFAULT_VERSION = '2021.12.17'
|
||||
|
||||
def __init__(self, ydl):
|
||||
self._ydl = ydl
|
||||
|
||||
@@ -23,7 +32,7 @@ class Cache(object):
|
||||
res = self._ydl.params.get('cachedir')
|
||||
if res is None:
|
||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, 'youtube-dl')
|
||||
res = os.path.join(cache_root, self._YTDL_DIR)
|
||||
return expand_path(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
@@ -50,13 +59,22 @@ class Cache(object):
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(data, fn)
|
||||
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(
|
||||
'Writing cache to %r failed: %s' % (fn, tb))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
def _validate(self, data, min_ver):
|
||||
version = try_get(data, lambda x: x[self._VERSION_KEY])
|
||||
if not version: # Backward compatibility
|
||||
data, version = {'data': data}, self._DEFAULT_VERSION
|
||||
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
||||
return data['data']
|
||||
self._ydl.to_screen(
|
||||
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None, min_ver=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
@@ -66,12 +84,12 @@ class Cache(object):
|
||||
try:
|
||||
try:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
return json.load(cachef)
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
file_size = error_to_compat_str(oe)
|
||||
self._ydl.report_warning(
|
||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except IOError:
|
||||
|
1667
youtube_dl/casefold.py
Normal file
1667
youtube_dl/casefold.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -21,6 +21,19 @@ import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree
|
||||
|
||||
# deal with critical unicode/str things first
|
||||
try:
|
||||
# Python 2
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
unicode, basestring, unichr
|
||||
)
|
||||
from .casefold import casefold as compat_casefold
|
||||
except NameError:
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
str, str, chr
|
||||
)
|
||||
compat_casefold = lambda s: s.casefold()
|
||||
|
||||
try:
|
||||
import collections.abc as compat_collections_abc
|
||||
except ImportError:
|
||||
@@ -2373,11 +2386,6 @@ try:
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
@@ -2508,22 +2516,11 @@ except ImportError: # Python < 3.4
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
compat_basestring = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
|
||||
@@ -2985,7 +2982,6 @@ except ImportError:
|
||||
except ImportError:
|
||||
compat_filter = filter
|
||||
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
except ImportError: # not 2.6+ or is 3.x
|
||||
@@ -2995,6 +2991,82 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
# method renamed between Py2/3
|
||||
try:
|
||||
from itertools import zip_longest as compat_itertools_zip_longest
|
||||
except ImportError:
|
||||
from itertools import izip_longest as compat_itertools_zip_longest
|
||||
|
||||
|
||||
# new class in collections
|
||||
try:
|
||||
from collections import ChainMap as compat_collections_chain_map
|
||||
# Py3.3's ChainMap is deficient
|
||||
if sys.version_info < (3, 4):
|
||||
raise ImportError
|
||||
except ImportError:
|
||||
# Py <= 3.3
|
||||
class compat_collections_chain_map(compat_collections_abc.MutableMapping):
|
||||
|
||||
maps = [{}]
|
||||
|
||||
def __init__(self, *maps):
|
||||
self.maps = list(maps) or [{}]
|
||||
|
||||
def __getitem__(self, k):
|
||||
for m in self.maps:
|
||||
if k in m:
|
||||
return m[k]
|
||||
raise KeyError(k)
|
||||
|
||||
def __setitem__(self, k, v):
|
||||
self.maps[0].__setitem__(k, v)
|
||||
return
|
||||
|
||||
def __contains__(self, k):
|
||||
return any((k in m) for m in self.maps)
|
||||
|
||||
def __delitem(self, k):
|
||||
if k in self.maps[0]:
|
||||
del self.maps[0][k]
|
||||
return
|
||||
raise KeyError(k)
|
||||
|
||||
def __delitem__(self, k):
|
||||
self.__delitem(k)
|
||||
|
||||
def __iter__(self):
|
||||
return itertools.chain(*reversed(self.maps))
|
||||
|
||||
def __len__(self):
|
||||
return len(iter(self))
|
||||
|
||||
# to match Py3, don't del directly
|
||||
def pop(self, k, *args):
|
||||
if self.__contains__(k):
|
||||
off = self.__getitem__(k)
|
||||
self.__delitem(k)
|
||||
return off
|
||||
elif len(args) > 0:
|
||||
return args[0]
|
||||
raise KeyError(k)
|
||||
|
||||
def new_child(self, m=None, **kwargs):
|
||||
m = m or {}
|
||||
m.update(kwargs)
|
||||
return compat_collections_chain_map(m, *self.maps)
|
||||
|
||||
@property
|
||||
def parents(self):
|
||||
return compat_collections_chain_map(*(self.maps[1:]))
|
||||
|
||||
|
||||
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
|
||||
compat_re_Pattern = type(re.compile(''))
|
||||
# and on the type of a match
|
||||
compat_re_Match = type(re.match('a', 'a'))
|
||||
|
||||
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
@@ -3029,8 +3101,10 @@ __all__ = [
|
||||
'compat_Struct',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_casefold',
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_collections_chain_map',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
@@ -3051,6 +3125,7 @@ __all__ = [
|
||||
'compat_input',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_itertools_zip_longest',
|
||||
'compat_kwargs',
|
||||
'compat_map',
|
||||
'compat_numeric_types',
|
||||
@@ -3058,6 +3133,8 @@ __all__ = [
|
||||
'compat_os_name',
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_re_Match',
|
||||
'compat_re_Pattern',
|
||||
'compat_realpath',
|
||||
'compat_setenv',
|
||||
'compat_shlex_quote',
|
||||
|
@@ -31,30 +31,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'id': '9841',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'title': 'Fruits Basket - Episode 1',
|
||||
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
|
||||
'series': 'Fruits Basket',
|
||||
'duration': 1437,
|
||||
'release_date': '20190405',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
@@ -82,14 +86,14 @@ class ADNIE(InfoExtractor):
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
fatal=False, headers={'Origin': 'https://' + self._BASE})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -138,9 +142,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if not username:
|
||||
return
|
||||
try:
|
||||
url = self._API_BASE_URL + 'authentication/login'
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
'rememberMe': False,
|
||||
@@ -153,7 +157,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
self._webpage_read_content(e.cause, url, username),
|
||||
username, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
@@ -211,7 +216,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(
|
||||
self._webpage_read_content(e.cause, links_url, video_id),
|
||||
video_id, fatal=False) or {}
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -12,13 +13,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.bongacams.net/claireashton',
|
||||
'info_dict': {
|
||||
'id': 'claireashton',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'age_limit': 18,
|
||||
'uploader_id': 'ClaireAshton',
|
||||
'uploader': 'ClaireAshton',
|
||||
'like_count': int,
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -12,35 +12,21 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
'id': '61924494877246241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Život v Grónsku',
|
||||
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3350,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
||||
'info_dict': {
|
||||
'id': '61924494877028507',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Bonus 01 - En',
|
||||
'title': 'Bonus 01 - En - Hyde Park Civilizace',
|
||||
'description': 'English Subtittles',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 81.3,
|
||||
@@ -51,31 +37,111 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r'ČT1 - živé vysílání online',
|
||||
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
# another
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
# 'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Bogotart - Queer',
|
||||
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494877311053',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bogotart - Queer (Varování 18+)',
|
||||
'duration': 11.9,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bogotart - Queer (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', **kw),
|
||||
video_id, **kw)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
type_ = 'IDEC'
|
||||
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
||||
next_data = self._search_nextjs_data(webpage, playlist_id)
|
||||
if '/zive/' in parsed_url.path:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
||||
else:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||
if not idec:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
||||
if idec:
|
||||
type_ = 'bonus'
|
||||
if not idec:
|
||||
raise ExtractorError('Failed to find IDEC id')
|
||||
iframe_hash = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
|
||||
playlist_id, note='Getting IFRAME hash')
|
||||
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
|
||||
webpage = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
|
||||
playlist_id, note='Downloading player', query=query)
|
||||
|
||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
self.raise_geo_restricted(NOT_AVAILABLE_STRING)
|
||||
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
|
||||
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
||||
|
||||
type_ = None
|
||||
episode_id = None
|
||||
@@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
data = {
|
||||
'playlist[0][type]': type_,
|
||||
'playlist[0][id]': episode_id,
|
||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||
'requestUrl': parsed_url.path,
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
@@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
@@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
@@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
entries[num]['formats'].extend(formats)
|
||||
continue
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
item_id = str_or_none(item.get('id') or item['assetId'])
|
||||
title = item['title']
|
||||
|
||||
duration = float_or_none(item.get('duration'))
|
||||
@@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
if playlist_len == 1:
|
||||
final_title = playlist_title or title
|
||||
if is_live:
|
||||
final_title = self._live_title(final_title)
|
||||
else:
|
||||
final_title = '%s (%s)' % (playlist_title, title)
|
||||
|
||||
@@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
for e in entries:
|
||||
self._sort_formats(e['formats'])
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
@@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
yield line
|
||||
|
||||
return '\r\n'.join(_fix_subtitle(subtitles))
|
||||
|
||||
|
||||
class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Queer: Bogotart',
|
||||
'description': 'Alternativní průvodce současným queer světem',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494876844842',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Varování 18+)',
|
||||
'duration': 10.2,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||
webpage, 'iframe player url', group='url')), query={
|
||||
'autoStart': 'true',
|
||||
})
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
@@ -70,6 +70,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -2713,7 +2714,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
@@ -2734,9 +2735,14 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
flat_pl = try_get(jwplayer_data, lambda x: x.get('playlist') or True)
|
||||
if flat_pl is None:
|
||||
# not even a dict
|
||||
return []
|
||||
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
if flat_pl is True:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
@@ -2784,6 +2790,13 @@ class InfoExtractor(object):
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ...
|
||||
'genre': clean_html(video_data.get('genre')),
|
||||
'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'release_year': int_or_none(video_data.get('releasedate')),
|
||||
'age_limit': int_or_none(video_data.get('age_restriction')),
|
||||
}
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
|
||||
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
|
||||
@@ -2792,7 +2805,9 @@ class InfoExtractor(object):
|
||||
'url': formats[0]['url'],
|
||||
})
|
||||
else:
|
||||
self._sort_formats(formats)
|
||||
# avoid exception in case of only sttls
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
if len(entries) == 1:
|
||||
@@ -2802,7 +2817,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
urls = []
|
||||
urls = set()
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
@@ -2811,14 +2826,14 @@ class InfoExtractor(object):
|
||||
base_url, self._proto_relative_url(source.get('file')))
|
||||
if not source_url or source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=m3u8_id, fatal=False))
|
||||
elif source_type == 'dash' or ext == 'mpd':
|
||||
elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
@@ -2833,20 +2848,23 @@ class InfoExtractor(object):
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
format_id = str_or_none(source.get('label'))
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
if height is None and format_id:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like "1080p", "720p SD", or 1080.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
|
||||
'height', default=None))
|
||||
height = parse_resolution(format_id).get('height')
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
'tbr': int_or_none(source.get('bitrate'), scale=1000),
|
||||
'filesize': int_or_none(source.get('filesize')),
|
||||
'ext': ext,
|
||||
}
|
||||
if format_id:
|
||||
a_format['format_id'] = format_id
|
||||
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
|
@@ -208,10 +208,7 @@ from .ccc import (
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import (
|
||||
CeskaTelevizeIE,
|
||||
CeskaTelevizePoradyIE,
|
||||
)
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
@@ -377,6 +374,7 @@ from .fc2 import (
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .fifa import FifaIE
|
||||
from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
@@ -912,6 +910,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import (
|
||||
PeekVidsIE,
|
||||
PlayVidsIE,
|
||||
)
|
||||
from .peertube import PeerTubeIE
|
||||
from .people import PeopleIE
|
||||
from .performgroup import PerformGroupIE
|
||||
@@ -1264,6 +1266,11 @@ from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .thisvid import (
|
||||
ThisVidIE,
|
||||
ThisVidMemberIE,
|
||||
ThisVidPlaylistIE,
|
||||
)
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import (
|
||||
TikTokIE,
|
||||
|
101
youtube_dl/extractor/fifa.py
Normal file
101
youtube_dl/extractor/fifa.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
|
||||
|
||||
BaseInfoExtractor = InfoExtractor
|
||||
|
||||
import re
|
||||
|
||||
class InfoExtractor(BaseInfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _match_valid_url(cls, url):
|
||||
return re.match(cls._VALID_URL, url)
|
||||
|
||||
|
||||
class FifaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
|
||||
'info_dict': {
|
||||
'id': '7on10qPcnyLajDDU3ntg6y',
|
||||
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
|
||||
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments'],
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
|
||||
'duration': 8165,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
|
||||
'info_dict': {
|
||||
'id': '1cg5r5Qt6Qt12ilkDgb1sV',
|
||||
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
|
||||
'description': 'md5:d908c74ee66322b804ae2e521b02a855',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments', 'Highlights'],
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
|
||||
'duration': 902,
|
||||
'release_timestamp': 1404777600,
|
||||
'release_date': '20140708',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
|
||||
'info_dict': {
|
||||
'id': '3C6gQH9C2DLwzNx7BMRQdp',
|
||||
'title': 'Josimar goal against Northern Ireland | Classic Goals',
|
||||
'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments', 'Goal'],
|
||||
'duration': 28,
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, locale = self._match_valid_url(url).group('id', 'locale')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
preconnect_link = self._search_regex(
|
||||
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
|
||||
|
||||
video_details = self._download_json(
|
||||
'{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
|
||||
|
||||
preplay_parameters = self._download_json(
|
||||
'{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
|
||||
|
||||
content_data = self._download_json(
|
||||
# 1. query string is expected to be sent as-is
|
||||
# 2. `sig` must be appended
|
||||
# 3. if absent, the call appears to work but the manifest is bad (404)
|
||||
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
|
||||
video_id, 'Downloading Content Data')
|
||||
|
||||
# formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_details['title'],
|
||||
'description': video_details.get('description'),
|
||||
'duration': int_or_none(video_details.get('duration')),
|
||||
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
|
||||
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
|
||||
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
@@ -35,6 +36,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -2227,6 +2229,97 @@ class GenericIE(InfoExtractor):
|
||||
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
|
||||
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July',
|
||||
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/embed/105/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July / Embed Player',
|
||||
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# KVS Player (tested also in thisvid.py)
|
||||
'url': 'https://youix.com/video/leningrad-zoj/',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
|
||||
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://youix.com/embed/18485',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ленинград - ЗОЖ',
|
||||
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
|
||||
'md5': '94166bdb26b4cb1fb9214319a629fc51',
|
||||
'info_dict': {
|
||||
'id': '21217',
|
||||
'display_id': '40-nochey-2016',
|
||||
'ext': 'mp4',
|
||||
'title': '40 ночей (2016) - BogMedia.org',
|
||||
'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
|
||||
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player (for sites that serve kt_player.js via non-https urls)
|
||||
'url': 'http://www.camhub.world/embed/389508',
|
||||
'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
|
||||
'info_dict': {
|
||||
'id': '389508',
|
||||
'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
|
||||
'ext': 'mp4',
|
||||
'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
|
||||
'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mrdeepfakes.com/video/5/selena-gomez-pov-deep-fakes',
|
||||
'md5': 'fec4ad5ec150f655e0c74c696a4a2ff4',
|
||||
'info_dict': {
|
||||
'id': '5',
|
||||
'display_id': 'selena-gomez-pov-deep-fakes',
|
||||
'ext': 'mp4',
|
||||
'title': 'Selena Gomez POV (Deep Fakes) DeepFake Porn - MrDeepFakes',
|
||||
'description': 'md5:17d1f84b578c9c26875ac5ef9a932354',
|
||||
'height': 720,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
|
||||
'md5': 'e2f0a4c329f7986280b7328e24036d60',
|
||||
'info_dict': {
|
||||
'id': '284002',
|
||||
'display_id': 'just-out-of-the-shower-joi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Just Out Of The Shower JOI - Shooshtime',
|
||||
'height': 720,
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@@ -2332,6 +2425,88 @@ class GenericIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
def _extract_kvs(self, url, webpage, video_id):
|
||||
|
||||
def getlicensetoken(license):
|
||||
modlicense = license.replace('$', '').replace('0', '1')
|
||||
center = int(len(modlicense) / 2)
|
||||
fronthalf = int(modlicense[:center + 1])
|
||||
backhalf = int(modlicense[center:])
|
||||
|
||||
modlicense = compat_str(4 * abs(fronthalf - backhalf))
|
||||
|
||||
def parts():
|
||||
for o in range(0, center + 1):
|
||||
for i in range(1, 5):
|
||||
yield compat_str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||
|
||||
return ''.join(parts())
|
||||
|
||||
def getrealurl(video_url, license_code):
|
||||
if not video_url.startswith('function/0/'):
|
||||
return video_url # not obfuscated
|
||||
|
||||
url_path, _, url_query = video_url.partition('?')
|
||||
urlparts = url_path.split('/')[2:]
|
||||
license = getlicensetoken(license_code)
|
||||
newmagic = urlparts[5][:32]
|
||||
|
||||
def spells(x, o):
|
||||
l = (o + sum(int(n) for n in license[o:])) % 32
|
||||
for i in range(0, len(x)):
|
||||
yield {l: x[o], o: x[l]}.get(i, x[i])
|
||||
|
||||
for o in range(len(newmagic) - 1, -1, -1):
|
||||
newmagic = ''.join(spells(newmagic, o))
|
||||
|
||||
urlparts[5] = newmagic + urlparts[5][32:]
|
||||
return '/'.join(urlparts) + '?' + url_query
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'(?s)<script\b[^>]*>.*?var\s+flashvars\s*=\s*(\{.+?\});.*?</script>',
|
||||
webpage, 'flashvars')
|
||||
flashvars = self._parse_json(flashvars, video_id, transform_source=js_to_json)
|
||||
|
||||
# extract the part after the last / as the display_id from the
|
||||
# canonical URL.
|
||||
display_id = self._search_regex(
|
||||
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
|
||||
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
|
||||
webpage, 'display_id', fatal=False
|
||||
)
|
||||
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
|
||||
|
||||
thumbnail = flashvars['preview_url']
|
||||
if thumbnail.startswith('//'):
|
||||
protocol, _, _ = url.partition('/')
|
||||
thumbnail = protocol + thumbnail
|
||||
|
||||
url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
|
||||
formats = []
|
||||
for key in url_keys:
|
||||
if '/get_file/' not in flashvars[key]:
|
||||
continue
|
||||
format_id = flashvars.get(key + '_text', key)
|
||||
formats.append(merge_dicts(
|
||||
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
|
||||
'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
|
||||
'format_id': format_id,
|
||||
'ext': 'mp4',
|
||||
'http_headers': {'Referer': url},
|
||||
}))
|
||||
if not formats[-1].get('height'):
|
||||
formats[-1]['quality'] = 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
@@ -2540,9 +2715,16 @@ class GenericIE(InfoExtractor):
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
||||
r'>[^<]*you acknowledge you are at least (\d+) years old',
|
||||
r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
for marker in AGE_LIMIT_MARKERS:
|
||||
m = re.search(marker, webpage)
|
||||
if not m:
|
||||
continue
|
||||
age_limit = max(
|
||||
age_limit or 0,
|
||||
int_or_none(m.groups() and m.group(1), default=18))
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
@@ -3389,6 +3571,20 @@ class GenericIE(InfoExtractor):
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
# Look for generic KVS player (before ld+json for tests)
|
||||
found = self._search_regex(
|
||||
(r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
|
||||
# kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
|
||||
r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
|
||||
), webpage, 'KVS player', group='ver', default=False)
|
||||
if found:
|
||||
self.report_extraction('%s: KVS Player' % (video_id, ))
|
||||
if found.split('.')[0] not in ('4', '5', '6'):
|
||||
self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
|
||||
return merge_dicts(
|
||||
self._extract_kvs(url, webpage, video_id),
|
||||
info_dict)
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
|
@@ -1,6 +1,9 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
@@ -90,7 +93,11 @@ class InfoQIE(BokeCCBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
try:
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
except ExtractorError:
|
||||
fields = {}
|
||||
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
@@ -1,11 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -20,17 +25,20 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '133957',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||
'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
|
||||
'info_dict': {
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
@@ -39,17 +47,50 @@ class ManyVidsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, compat_str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
@@ -62,7 +103,8 @@ class ManyVidsIE(InfoExtractor):
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, fatal=False, data=urlencode_postdata({
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
@@ -70,23 +112,56 @@ class ManyVidsIE(InfoExtractor):
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
})
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
}
|
||||
|
@@ -78,7 +78,7 @@ class MindsIE(MindsBaseIE):
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
assert (entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
@@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -125,9 +126,10 @@ class MotherlessIE(InfoExtractor):
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id')
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
@@ -169,7 +171,18 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||
'any kind!'
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 0,
|
||||
'expected_warnings': [
|
||||
'This group has no videos.',
|
||||
]
|
||||
}, {
|
||||
'url': 'https://motherless.com/g/beautiful_cock',
|
||||
'info_dict': {
|
||||
'id': 'beautiful_cock',
|
||||
'title': 'Beautiful Cock',
|
||||
'description': 'Group for lovely cocks yours, mine, a friends anything human',
|
||||
},
|
||||
'playlist_mincount': 2500,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -208,16 +221,23 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
|
||||
webpage, 'page_count'), 'page_count')
|
||||
page_count = str_to_int(self._search_regex(
|
||||
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||
webpage, 'page_count', default=0))
|
||||
if not page_count:
|
||||
message = self._search_regex(
|
||||
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||
self.report_warning(message, group_id)
|
||||
page_count = 1
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
if idx > 0:
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
|
@@ -1,20 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from hashlib import md5
|
||||
from base64 import b64encode
|
||||
from binascii import hexlify
|
||||
from datetime import datetime
|
||||
from hashlib import md5
|
||||
from random import randint
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,32 +47,106 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
@classmethod
|
||||
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
|
||||
KEY = b'e82ckenh8dichen8'
|
||||
URL = '/api/song/enhance/player/url'
|
||||
now = int(time.time() * 1000)
|
||||
rand = randint(0, 1000)
|
||||
cookie = {
|
||||
'osver': None,
|
||||
'deviceId': None,
|
||||
'appver': '8.0.0',
|
||||
'versioncode': '140',
|
||||
'mobilename': None,
|
||||
'buildver': '1623435496',
|
||||
'resolution': '1920x1080',
|
||||
'__csrf': '',
|
||||
'os': 'pc',
|
||||
'channel': None,
|
||||
'requestId': '{0}_{1:04}'.format(now, rand),
|
||||
}
|
||||
request_text = json.dumps(
|
||||
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
|
||||
separators=(',', ':'))
|
||||
message = 'nobody{0}use{1}md5forencrypt'.format(
|
||||
URL, request_text).encode('latin1')
|
||||
msg_digest = md5(message).hexdigest()
|
||||
|
||||
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
|
||||
URL, request_text, msg_digest)
|
||||
data = pkcs7_padding(bytes_to_intlist(data))
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
|
||||
encrypted_params = hexlify(encrypted).decode('ascii').upper()
|
||||
|
||||
cookie = '; '.join(
|
||||
['{0}={1}'.format(k, v if v is not None else 'undefined')
|
||||
for [k, v] in cookie.items()])
|
||||
|
||||
headers = {
|
||||
'User-Agent': std_headers['User-Agent'],
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': 'https://music.163.com',
|
||||
'Cookie': cookie,
|
||||
}
|
||||
return ('params={0}'.format(encrypted_params), headers)
|
||||
|
||||
def _call_player_api(self, song_id, bitrate):
|
||||
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
|
||||
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
|
||||
try:
|
||||
msg = 'empty result'
|
||||
result = self._download_json(
|
||||
url, song_id, data=data.encode('ascii'), headers=headers)
|
||||
if result:
|
||||
return result
|
||||
except ExtractorError as e:
|
||||
if type(e.cause) in (ValueError, TypeError):
|
||||
# JSON load failure
|
||||
raise
|
||||
except Exception as e:
|
||||
msg = error_to_compat_str(e)
|
||||
self.report_warning('%s API call (%s) failed: %s' % (
|
||||
song_id, bitrate, msg))
|
||||
return {}
|
||||
|
||||
def extract_formats(self, info):
|
||||
err = 0
|
||||
formats = []
|
||||
song_id = info['id']
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
song_file_path = '/%s/%s.%s' % (
|
||||
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
|
||||
|
||||
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
|
||||
# from NetEase's CDN provider that can be used if m5.music.126.net does not
|
||||
# work, especially for users outside of Mainland China
|
||||
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
|
||||
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
|
||||
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
|
||||
song_url = host + song_file_path
|
||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||
data = self._call_player_api(song_id, bitrate)
|
||||
for song in try_get(data, lambda x: x['data'], list) or []:
|
||||
song_url = try_get(song, lambda x: x['url'])
|
||||
if not song_url:
|
||||
continue
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'ext': details.get('extension'),
|
||||
'abr': float_or_none(details.get('bitrate'), scale=1000),
|
||||
'abr': float_or_none(song.get('br'), scale=1000),
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
'filesize': int_or_none(song.get('size')),
|
||||
'asr': int_or_none(details.get('sr')),
|
||||
})
|
||||
break
|
||||
elif err == 0:
|
||||
err = try_get(song, lambda x: x['code'], int)
|
||||
|
||||
if not formats:
|
||||
msg = 'No media links found'
|
||||
if err != 0 and (err < 200 or err >= 400):
|
||||
raise ExtractorError(
|
||||
'%s (site code %d)' % (msg, err, ), expected=True)
|
||||
else:
|
||||
self.raise_geo_restricted(
|
||||
msg + ': probably this video is not available from your location due to geo restriction.',
|
||||
countries=['CN'])
|
||||
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
@@ -76,33 +162,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||
'md5': '3e909614ce09b1ccef4a3eb205441190',
|
||||
'info_dict': {
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||
'title': 'Bad Blood',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'upload_date': '20150517',
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
'upload_date': '20150516',
|
||||
'timestamp': 1431792000,
|
||||
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
'info_dict': {
|
||||
'id': '29822014',
|
||||
'ext': 'mp3',
|
||||
'title': '听见下雨的声音',
|
||||
'creator': '周杰伦',
|
||||
'upload_date': '20141225',
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
@@ -112,9 +184,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
@@ -128,7 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||
'info_dict': {
|
||||
'id': '95670',
|
||||
'ext': 'mp3',
|
||||
'title': '国际歌',
|
||||
'creator': '马备',
|
||||
'upload_date': '19911130',
|
||||
'timestamp': 691516800,
|
||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||
},
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
|
@@ -60,8 +60,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
fatal=fatal, query=query)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
|
193
youtube_dl/extractor/peekvids.py
Normal file
193
youtube_dl/extractor/peekvids.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PeekVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?peekvids\.com/
|
||||
(?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
|
||||
(?P<id>[^/?&#]*)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
|
||||
'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
|
||||
'info_dict': {
|
||||
'id': '1262717',
|
||||
'display_id': 'BSyLMbN0YCd',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1642579329,
|
||||
'upload_date': '20220119',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.peekvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-video-block', html)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, expected_status=429)
|
||||
if '>Rate Limit Exceeded' in webpage:
|
||||
raise ExtractorError(
|
||||
'[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
display_id = video_id
|
||||
video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
|
||||
srcs = self._download_json(
|
||||
'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
|
||||
note='Downloading list of source files')
|
||||
formats = [{
|
||||
'url': f_url,
|
||||
'format_id': f_id,
|
||||
'height': int_or_none(f_id),
|
||||
} for f_url, f_id in (
|
||||
(url_or_none(f_v), f_match.group(1))
|
||||
for f_v, f_match in (
|
||||
(v, re.match(r'^data-src(\d{3,})$', k))
|
||||
for k, v in srcs.items() if v) if f_match)
|
||||
if f_url
|
||||
]
|
||||
if not formats:
|
||||
formats = [{'url': url} for url in srcs.values()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
|
||||
info.pop('url', None)
|
||||
# may not have found the thumbnail if it was in a list in the ld+json
|
||||
info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
|
||||
detail = self._get_detail(webpage) or ''
|
||||
info['description'] = self._html_search_regex(
|
||||
r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
|
||||
detail, 'description', default=None) or None
|
||||
info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
|
||||
|
||||
def cat_tags(name, html):
|
||||
l = self._html_search_regex(
|
||||
r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
|
||||
html, name, default='')
|
||||
return [x for x in re.split(r'\s+', l) if x]
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'categories': cat_tags('Categories', detail),
|
||||
'tags': cat_tags('Tags', detail),
|
||||
'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
|
||||
}, info)
|
||||
|
||||
|
||||
class PlayVidsIE(PeekVidsIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'md5': '2f12e50213dd65f142175da633c4564c',
|
||||
'info_dict': {
|
||||
'id': '1978030',
|
||||
'display_id': 'U3pBrYhsjXM',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1640435839,
|
||||
'upload_date': '20211225',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
|
||||
'md5': 'e783986e596cafbf46411a174ab42ba6',
|
||||
'info_dict': {
|
||||
'id': '762385',
|
||||
'display_id': 'bKmGLe3IwjZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
|
||||
'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
|
||||
'timestamp': 1516958544,
|
||||
'upload_date': '20180126',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 480,
|
||||
'uploader': 'Brazzers',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/v/47iUho33toY',
|
||||
'md5': 'b056b5049d34b648c1e86497cf4febce',
|
||||
'info_dict': {
|
||||
'id': '700621',
|
||||
'display_id': '47iUho33toY',
|
||||
'ext': 'mp4',
|
||||
'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
|
||||
'description': None,
|
||||
'timestamp': 1507052209,
|
||||
'upload_date': '20171003',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 332,
|
||||
'uploader': 'Cacerenele',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
|
||||
'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
|
||||
'info_dict': {
|
||||
'id': '1523518',
|
||||
'display_id': 'z3_7iwWCmqt',
|
||||
'ext': 'mp4',
|
||||
'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
|
||||
'description': None,
|
||||
'timestamp': 1607470323,
|
||||
'upload_date': '20201208',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 593,
|
||||
'uploader': 'yorours',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.playvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-block', html)
|
@@ -34,7 +34,9 @@ class TelegraafIE(InfoExtractor):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
video_id = self._download_json(
|
||||
'https://www.telegraaf.nl/graphql', article_id, query={
|
||||
'https://app.telegraaf.nl/graphql', article_id,
|
||||
headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
|
||||
query={
|
||||
'query': '''{
|
||||
article(uid: %s) {
|
||||
videos {
|
||||
|
218
youtube_dl/extractor/thisvid.py
Normal file
218
youtube_dl/extractor/thisvid.py
Normal file
@@ -0,0 +1,218 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class ThisVidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
|
||||
'md5': '839becb572995687e11a69dc4358a386',
|
||||
'info_dict': {
|
||||
'id': '3533241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sitting on ball tight jeans',
|
||||
'description': 'md5:372353bb995883d1b65fddf507489acd',
|
||||
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
|
||||
'uploader_id': '150629',
|
||||
'uploader': 'jeanslevisjeans',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://thisvid.com/embed/3533241/',
|
||||
'md5': '839becb572995687e11a69dc4358a386',
|
||||
'info_dict': {
|
||||
'id': '3533241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sitting on ball tight jeans',
|
||||
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
|
||||
'uploader_id': '150629',
|
||||
'uploader': 'jeanslevisjeans',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, main_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
|
||||
webpage, 'title')
|
||||
|
||||
if type_ == 'embed':
|
||||
# look for more metadata
|
||||
video_alt_url = url_or_none(self._search_regex(
|
||||
r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
|
||||
webpage, 'video_alt_url', default=None))
|
||||
if video_alt_url and video_alt_url != url:
|
||||
webpage = self._download_webpage(
|
||||
video_alt_url, main_id,
|
||||
note='Redirecting embed to main page', fatal=False) or webpage
|
||||
|
||||
video_holder = get_element_by_class('video-holder', webpage) or ''
|
||||
if '>This video is a private video' in video_holder:
|
||||
self.raise_login_required(
|
||||
(clean_html(video_holder) or 'Private video').split('\n', 1)[0])
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
|
||||
webpage, 'uploader', default='')
|
||||
uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
|
||||
if len(uploader) == 2:
|
||||
# id must be non-empty, uploader could be ''
|
||||
uploader_id, uploader = uploader
|
||||
uploader = uploader or None
|
||||
else:
|
||||
uploader_id = uploader = None
|
||||
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
}, self.url_result(url, ie='Generic'))
|
||||
|
||||
|
||||
class ThisVidMemberIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/members/2140501/',
|
||||
'info_dict': {
|
||||
'id': '2140501',
|
||||
'title': 'Rafflesia\'s Profile',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/members/2140501/favourite_videos/',
|
||||
'info_dict': {
|
||||
'id': '2140501',
|
||||
'title': 'Rafflesia\'s Favourite Videos',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/members/636468/public_videos/',
|
||||
'info_dict': {
|
||||
'id': '636468',
|
||||
'title': 'Happymouth\'s Public Videos',
|
||||
},
|
||||
'playlist_mincount': 196,
|
||||
},
|
||||
]
|
||||
|
||||
def _urls(self, html):
|
||||
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
|
||||
yield m.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, pl_id)
|
||||
|
||||
title = re.split(
|
||||
r'(?i)\s*\|\s*ThisVid\.com\s*$',
|
||||
self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
|
||||
|
||||
def entries(page_url, html=None):
|
||||
for page in itertools.count(1):
|
||||
if not html:
|
||||
html = self._download_webpage(
|
||||
page_url, pl_id, note='Downloading page %d' % (page, ),
|
||||
fatal=False) or ''
|
||||
for u in self._urls(html):
|
||||
yield u
|
||||
next_page = get_element_by_class('pagination-next', html) or ''
|
||||
if next_page:
|
||||
# member list page
|
||||
next_page = urljoin(url, self._search_regex(
|
||||
r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
|
||||
next_page, 'next page link', group='url', default=None))
|
||||
# in case a member page should have pagination-next with empty link, not just `else:`
|
||||
if next_page is None:
|
||||
# playlist page
|
||||
parsed_url = compat_urlparse.urlparse(page_url)
|
||||
base_path, num = parsed_url.path.rsplit('/', 1)
|
||||
num = int_or_none(num)
|
||||
if num is None:
|
||||
base_path, num = parsed_url.path.rstrip('/'), 1
|
||||
parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
|
||||
next_page = compat_urlparse.urlunparse(parsed_url)
|
||||
if page_url == next_page:
|
||||
next_page = None
|
||||
if not next_page:
|
||||
break
|
||||
page_url, html = next_page, None
|
||||
|
||||
return self.playlist_from_matches(
|
||||
entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
|
||||
|
||||
|
||||
class ThisVidPlaylistIE(ThisVidMemberIE):
|
||||
_VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
|
||||
'info_dict': {
|
||||
'id': '6615',
|
||||
'title': 'Underwear Stuff',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
|
||||
'info_dict': {
|
||||
'id': '1072387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Italian Booty 28',
|
||||
'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
|
||||
'uploader_id': '367912',
|
||||
'uploader': 'Jcmusclefun',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_video_url(self, pl_url):
|
||||
video_id = re.match(self._VALID_URL, pl_url).group('video_id')
|
||||
return urljoin(pl_url, '/videos/%s/' % (video_id, ))
|
||||
|
||||
def _urls(self, html):
|
||||
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
|
||||
yield self._get_video_url(m.group('url'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just the featured video because of --no-playlist')
|
||||
return self.url_result(self._get_video_url(url), 'ThisVid')
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
|
||||
result = super(ThisVidPlaylistIE, self)._real_extract(url)
|
||||
|
||||
# rework title returned as `the title - the title`
|
||||
title = result['title']
|
||||
t_len = len(title)
|
||||
if t_len > 5 and t_len % 2 != 0:
|
||||
t_len = t_len // 2
|
||||
if title[t_len] == '-':
|
||||
title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
|
||||
if title[0] and title[0] == title[1]:
|
||||
result['title'] = title[0]
|
||||
return result
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'info_dict': {
|
||||
|
@@ -663,7 +663,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
if '//player.vimeo.com/video/' in url:
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
||||
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(
|
||||
redirect_url, video_id, headers)
|
||||
|
@@ -1500,7 +1500,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return lambda s: jsi.extract_function_from_code(*func_code)([s])
|
||||
|
||||
def _n_descramble(self, n_param, player_url, video_id):
|
||||
"""Compute the response to YT's "n" parameter challenge
|
||||
"""Compute the response to YT's "n" parameter challenge,
|
||||
or None
|
||||
|
||||
Args:
|
||||
n_param -- challenge string that is the value of the
|
||||
@@ -1518,7 +1519,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if player_id not in self._player_cache:
|
||||
self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
|
||||
func = self._player_cache[player_id]
|
||||
self._player_cache[sig_id] = func(n_param)
|
||||
ret = func(n_param)
|
||||
if ret.startswith('enhanced_except_'):
|
||||
raise ExtractorError('Unhandled exception in decode')
|
||||
self._player_cache[sig_id] = ret
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen('[debug] [%s] %s' % (self.IE_NAME, 'Decrypted nsig {0} => {1}'.format(n_param, self._player_cache[sig_id])))
|
||||
return self._player_cache[sig_id]
|
||||
@@ -1539,10 +1543,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
n_param = n_param[-1]
|
||||
n_response = self._n_descramble(n_param, player_url, video_id)
|
||||
if n_response:
|
||||
qs['n'] = [n_response]
|
||||
fmt['url'] = compat_urlparse.urlunparse(
|
||||
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
if n_response is None:
|
||||
# give up if descrambling failed
|
||||
break
|
||||
qs['n'] = [n_response]
|
||||
fmt['url'] = compat_urlparse.urlunparse(
|
||||
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
def _mark_watched(self, video_id, player_response):
|
||||
playback_url = url_or_none(try_get(
|
||||
|
@@ -8,13 +8,14 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
NO_DEFAULT,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
@@ -57,28 +58,39 @@ class ZDFBaseIE(InfoExtractor):
|
||||
format_urls.add(format_url)
|
||||
mime_type = meta.get('mimeType')
|
||||
ext = determine_ext(format_url)
|
||||
|
||||
join_nonempty = lambda s, l: s.join(filter(None, l))
|
||||
meta_map = lambda t: map(lambda x: str_or_none(meta.get(x)), t)
|
||||
|
||||
if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
new_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||
entry_protocol='m3u8_native', fatal=False))
|
||||
entry_protocol='m3u8_native', fatal=False)
|
||||
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
|
||||
new_formats = self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
|
||||
else:
|
||||
f = parse_codecs(meta.get('mimeCodec'))
|
||||
if not f:
|
||||
data = meta.get('type', '').split('_')
|
||||
if try_get(data, lambda x: x[2]) == ext:
|
||||
f = dict(zip(('vcodec', 'acodec'), data[1]))
|
||||
|
||||
format_id = ['http']
|
||||
for p in (meta.get('type'), meta.get('quality')):
|
||||
if p and isinstance(p, compat_str):
|
||||
format_id.append(p)
|
||||
format_id.extend(join_nonempty('-', meta_map(('type', 'quality'))))
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_note': meta.get('quality'),
|
||||
'language': meta.get('language'),
|
||||
'quality': qualities(self._QUALITIES)(meta.get('quality')),
|
||||
'preference': -10,
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
|
||||
})
|
||||
formats.append(f)
|
||||
new_formats = [f]
|
||||
|
||||
formats.extend(merge_dicts(f, {
|
||||
'format_note': join_nonempty(',', meta_map(('quality', 'class'))),
|
||||
'language': meta.get('language'),
|
||||
'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
|
||||
'quality': qualities(self._QUALITIES)(meta.get('quality')),
|
||||
}) for f in new_formats)
|
||||
|
||||
def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
|
||||
ptmd = self._call_api(
|
||||
@@ -107,6 +119,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
'type': f.get('type'),
|
||||
'mimeType': f.get('mimeType'),
|
||||
'quality': quality.get('quality'),
|
||||
'class': track.get('class'),
|
||||
'language': track.get('language'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
@@ -171,6 +184,20 @@ class ZDFIE(ZDFBaseIE):
|
||||
'duration': 2615,
|
||||
'timestamp': 1465021200,
|
||||
'upload_date': '20160604',
|
||||
'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
||||
'md5': '1b93bdec7d02fc0b703c5e7687461628',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'video_funk_1770473',
|
||||
'duration': 1278,
|
||||
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
|
||||
'title': 'Alles ist verzaubert',
|
||||
'timestamp': 1635520560,
|
||||
'upload_date': '20211029',
|
||||
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
|
||||
@@ -204,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
|
||||
'timestamp': 1641355200,
|
||||
'upload_date': '20220105',
|
||||
},
|
||||
'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
|
||||
'info_dict': {
|
||||
'id': '191205_1800_sendung_sok8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das Geld anderer Leute',
|
||||
'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
|
||||
'duration': 2581.0,
|
||||
'timestamp': 1654790700,
|
||||
'upload_date': '20220609',
|
||||
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_entry(self, url, player, content, video_id):
|
||||
@@ -265,15 +305,16 @@ class ZDFIE(ZDFBaseIE):
|
||||
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
document = video['document']
|
||||
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
formats = []
|
||||
format_urls = set()
|
||||
for f in document['formitaeten']:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||
document = formitaeten and video['document']
|
||||
if formitaeten:
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
format_urls = set()
|
||||
for f in formitaeten or []:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
@@ -320,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
||||
'info_dict': {
|
||||
'id': 'das-aktuelle-sportstudio',
|
||||
'title': 'das aktuelle sportstudio | ZDF',
|
||||
'title': 'das aktuelle sportstudio',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||
'info_dict': {
|
||||
@@ -330,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'title': 'planet e.',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
||||
'info_dict': {
|
||||
'id': 'aktenzeichen-xy-ungeloest',
|
||||
'title': 'Aktenzeichen XY... ungelöst',
|
||||
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||
'only_matching': True,
|
||||
@@ -339,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
def suitable(cls, url):
|
||||
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
|
||||
|
||||
def _og_search_title(self, webpage, fatal=False):
|
||||
title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
|
||||
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, channel_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(item_url, ie=ZDFIE.ie_key())
|
||||
for item_url in orderedSet(re.findall(
|
||||
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
|
||||
matches = re.finditer(
|
||||
r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
|
||||
webpage)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, self._og_search_title(webpage, fatal=False))
|
||||
if self._downloader.params.get('noplaylist', False):
|
||||
entry = next(
|
||||
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
||||
None)
|
||||
self.to_screen('Downloading just the main video because of --no-playlist')
|
||||
if entry:
|
||||
return entry
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
|
||||
|
||||
r"""
|
||||
player = self._extract_player(webpage, channel_id)
|
||||
def check_video(m):
|
||||
v_ref = self._search_regex(
|
||||
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
|
||||
webpage, 'check id', default='')
|
||||
v_ref = extract_attributes(v_ref)
|
||||
return v_ref.get('data-target-video-type') != 'novideo'
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
|
||||
'channel id', group='id')
|
||||
|
||||
channel = self._call_api(
|
||||
'https://api.zdf.de/content/documents/%s.json' % channel_id,
|
||||
player, url, channel_id)
|
||||
|
||||
items = []
|
||||
for module in channel['module']:
|
||||
for teaser in try_get(module, lambda x: x['teaser'], list) or []:
|
||||
t = try_get(
|
||||
teaser, lambda x: x['http://zdf.de/rels/target'], dict)
|
||||
if not t:
|
||||
continue
|
||||
items.extend(try_get(
|
||||
t,
|
||||
lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
||||
list) or [])
|
||||
items.extend(try_get(
|
||||
module,
|
||||
lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
||||
list) or [])
|
||||
|
||||
entries = []
|
||||
entry_urls = set()
|
||||
for item in items:
|
||||
t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
|
||||
if not t:
|
||||
continue
|
||||
sharing_url = t.get('http://zdf.de/rels/sharing-url')
|
||||
if not sharing_url or not isinstance(sharing_url, compat_str):
|
||||
continue
|
||||
if sharing_url in entry_urls:
|
||||
continue
|
||||
entry_urls.add(sharing_url)
|
||||
entries.append(self.url_result(
|
||||
sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
|
||||
|
||||
return self.playlist_result(entries, channel_id, channel.get('title'))
|
||||
"""
|
||||
return self.playlist_from_matches(
|
||||
(m.group('url') for m in matches if check_video(m)),
|
||||
channel_id, self._og_search_title(webpage, fatal=False))
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -801,7 +801,7 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'--postprocessor-args',
|
||||
dest='postprocessor_args', metavar='ARGS',
|
||||
help='Give these arguments to the postprocessor')
|
||||
help='Give these arguments to the postprocessor (if postprocessing is required)')
|
||||
postproc.add_option(
|
||||
'-k', '--keep-video',
|
||||
action='store_true', dest='keepvideo', default=False,
|
||||
|
@@ -40,6 +40,8 @@ class MetadataFromTitlePP(PostProcessor):
|
||||
% self._titleformat)
|
||||
return [], info
|
||||
for attribute, value in match.groupdict().items():
|
||||
if value is None:
|
||||
continue
|
||||
info[attribute] = value
|
||||
self._downloader.to_screen(
|
||||
'[fromtitle] parsed %s: %s'
|
||||
|
@@ -33,6 +33,7 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import unicodedata
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
@@ -42,6 +43,7 @@ from .compat import (
|
||||
compat_HTTPError,
|
||||
compat_basestring,
|
||||
compat_chr,
|
||||
compat_collections_abc,
|
||||
compat_cookiejar,
|
||||
compat_ctypes_WINFUNCTYPE,
|
||||
compat_etree_fromstring,
|
||||
@@ -1684,6 +1686,7 @@ USER_AGENTS = {
|
||||
|
||||
|
||||
NO_DEFAULT = object()
|
||||
IDENTITY = lambda x: x
|
||||
|
||||
ENGLISH_MONTH_NAMES = [
|
||||
'January', 'February', 'March', 'April', 'May', 'June',
|
||||
@@ -1696,6 +1699,17 @@ MONTH_NAMES = {
|
||||
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
|
||||
}
|
||||
|
||||
# Timezone names for RFC2822 obs-zone
|
||||
# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
|
||||
TIMEZONE_NAMES = {
|
||||
'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
|
||||
'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
|
||||
'EST': -5, 'EDT': -4, # Eastern
|
||||
'CST': -6, 'CDT': -5, # Central
|
||||
'MST': -7, 'MDT': -6, # Mountain
|
||||
'PST': -8, 'PDT': -7 # Pacific
|
||||
}
|
||||
|
||||
KNOWN_EXTENSIONS = (
|
||||
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
|
||||
'flv', 'f4v', 'f4a', 'f4b',
|
||||
@@ -1735,12 +1749,17 @@ DATE_FORMATS = (
|
||||
'%b %dth %Y %I:%M',
|
||||
'%Y %m %d',
|
||||
'%Y-%m-%d',
|
||||
'%Y.%m.%d.',
|
||||
'%Y/%m/%d',
|
||||
'%Y/%m/%d %H:%M',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%Y%m%d%H%M',
|
||||
'%Y%m%d%H%M%S',
|
||||
'%Y%m%d',
|
||||
'%Y-%m-%d %H:%M',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S.%f',
|
||||
'%Y-%m-%d %H:%M:%S:%f',
|
||||
'%d.%m.%Y %H:%M',
|
||||
'%d.%m.%Y %H.%M',
|
||||
'%Y-%m-%dT%H:%M:%SZ',
|
||||
@@ -1753,6 +1772,7 @@ DATE_FORMATS = (
|
||||
'%b %d %Y at %H:%M:%S',
|
||||
'%B %d %Y at %H:%M',
|
||||
'%B %d %Y at %H:%M:%S',
|
||||
'%H:%M %d-%b-%Y',
|
||||
)
|
||||
|
||||
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
|
||||
@@ -1763,6 +1783,7 @@ DATE_FORMATS_DAY_FIRST.extend([
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
'%d-%m-%Y %H:%M',
|
||||
])
|
||||
|
||||
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
|
||||
@@ -2100,6 +2121,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
|
||||
return '_'
|
||||
return char
|
||||
|
||||
# Replace look-alike Unicode glyphs
|
||||
if restricted and not is_id:
|
||||
s = unicodedata.normalize('NFKC', s)
|
||||
# Handle timestamps
|
||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
|
||||
result = ''.join(map(replace_insane, s))
|
||||
@@ -2966,10 +2990,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||
|
||||
def extract_timezone(date_str):
|
||||
m = re.search(
|
||||
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
date_str)
|
||||
r'''(?x)
|
||||
^.{8,}? # >=8 char non-TZ prefix, if present
|
||||
(?P<tz>Z| # just the UTC Z, or
|
||||
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
|
||||
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
|
||||
[ ]? # optional space
|
||||
(?P<sign>\+|-) # +/-
|
||||
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
|
||||
$)
|
||||
''', date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
||||
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
|
||||
if timezone is not None:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
timezone = datetime.timedelta(hours=timezone or 0)
|
||||
else:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
if not m.group('sign'):
|
||||
@@ -3037,7 +3073,8 @@ def unified_timestamp(date_str, day_first=True):
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
date_str = re.sub(r'[,|]', '', date_str)
|
||||
date_str = re.sub(r'\s+', ' ', re.sub(
|
||||
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
|
||||
|
||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||
timezone, date_str = extract_timezone(date_str)
|
||||
@@ -3063,7 +3100,7 @@ def unified_timestamp(date_str, day_first=True):
|
||||
pass
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
return calendar.timegm(timetuple) + pm_delta * 3600
|
||||
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
||||
|
||||
|
||||
def determine_ext(url, default_ext='unknown_video'):
|
||||
@@ -3673,13 +3710,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
||||
if get_attr:
|
||||
if v is not None:
|
||||
v = getattr(v, get_attr, None)
|
||||
if v == '':
|
||||
v = None
|
||||
if v is None:
|
||||
if v in (None, ''):
|
||||
return default
|
||||
try:
|
||||
return int(v) * invscale // scale
|
||||
except (ValueError, TypeError):
|
||||
except (ValueError, TypeError, OverflowError):
|
||||
return default
|
||||
|
||||
|
||||
@@ -3834,6 +3869,105 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
|
||||
return unrecognized
|
||||
|
||||
|
||||
class LazyList(compat_collections_abc.Sequence):
|
||||
"""Lazy immutable list from an iterable
|
||||
Note that slices of a LazyList are lists and not LazyList"""
|
||||
|
||||
class IndexError(IndexError):
|
||||
def __init__(self, cause=None):
|
||||
if cause:
|
||||
# reproduce `raise from`
|
||||
self.__cause__ = cause
|
||||
super(IndexError, self).__init__()
|
||||
|
||||
def __init__(self, iterable, **kwargs):
|
||||
# kwarg-only
|
||||
reverse = kwargs.get('reverse', False)
|
||||
_cache = kwargs.get('_cache')
|
||||
|
||||
self._iterable = iter(iterable)
|
||||
self._cache = [] if _cache is None else _cache
|
||||
self._reversed = reverse
|
||||
|
||||
def __iter__(self):
|
||||
if self._reversed:
|
||||
# We need to consume the entire iterable to iterate in reverse
|
||||
for item in self.exhaust():
|
||||
yield item
|
||||
return
|
||||
for item in self._cache:
|
||||
yield item
|
||||
for item in self._iterable:
|
||||
self._cache.append(item)
|
||||
yield item
|
||||
|
||||
def _exhaust(self):
|
||||
self._cache.extend(self._iterable)
|
||||
self._iterable = [] # Discard the emptied iterable to make it pickle-able
|
||||
return self._cache
|
||||
|
||||
def exhaust(self):
|
||||
"""Evaluate the entire iterable"""
|
||||
return self._exhaust()[::-1 if self._reversed else 1]
|
||||
|
||||
@staticmethod
|
||||
def _reverse_index(x):
|
||||
return None if x is None else ~x
|
||||
|
||||
def __getitem__(self, idx):
|
||||
if isinstance(idx, slice):
|
||||
if self._reversed:
|
||||
idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
|
||||
start, stop, step = idx.start, idx.stop, idx.step or 1
|
||||
elif isinstance(idx, int):
|
||||
if self._reversed:
|
||||
idx = self._reverse_index(idx)
|
||||
start, stop, step = idx, idx, 0
|
||||
else:
|
||||
raise TypeError('indices must be integers or slices')
|
||||
if ((start or 0) < 0 or (stop or 0) < 0
|
||||
or (start is None and step < 0)
|
||||
or (stop is None and step > 0)):
|
||||
# We need to consume the entire iterable to be able to slice from the end
|
||||
# Obviously, never use this with infinite iterables
|
||||
self._exhaust()
|
||||
try:
|
||||
return self._cache[idx]
|
||||
except IndexError as e:
|
||||
raise self.IndexError(e)
|
||||
n = max(start or 0, stop or 0) - len(self._cache) + 1
|
||||
if n > 0:
|
||||
self._cache.extend(itertools.islice(self._iterable, n))
|
||||
try:
|
||||
return self._cache[idx]
|
||||
except IndexError as e:
|
||||
raise self.IndexError(e)
|
||||
|
||||
def __bool__(self):
|
||||
try:
|
||||
self[-1] if self._reversed else self[0]
|
||||
except self.IndexError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __len__(self):
|
||||
self._exhaust()
|
||||
return len(self._cache)
|
||||
|
||||
def __reversed__(self):
|
||||
return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
|
||||
|
||||
def __repr__(self):
|
||||
# repr and str should mimic a list. So we exhaust the iterable
|
||||
return repr(self.exhaust())
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.exhaust())
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
def __len__(self):
|
||||
# This is only useful for tests
|
||||
@@ -3941,7 +4075,8 @@ def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
|
||||
# ensure unicode: after quoting, it can always be converted
|
||||
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
@@ -4058,6 +4193,10 @@ def multipart_encode(data, boundary=None):
|
||||
return out, content_type
|
||||
|
||||
|
||||
def variadic(x, allowed_types=(compat_str, bytes, dict)):
|
||||
return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
@@ -4068,6 +4207,23 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
return d.get(key_or_keys, default)
|
||||
|
||||
|
||||
def try_call(*funcs, **kwargs):
|
||||
|
||||
# parameter defaults
|
||||
expected_type = kwargs.get('expected_type')
|
||||
fargs = kwargs.get('args', [])
|
||||
fkwargs = kwargs.get('kwargs', {})
|
||||
|
||||
for f in funcs:
|
||||
try:
|
||||
val = f(*fargs, **fkwargs)
|
||||
except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
|
||||
pass
|
||||
else:
|
||||
if expected_type is None or isinstance(val, expected_type):
|
||||
return val
|
||||
|
||||
|
||||
def try_get(src, getter, expected_type=None):
|
||||
if not isinstance(getter, (list, tuple)):
|
||||
getter = [getter]
|
||||
@@ -5801,3 +5957,220 @@ def clean_podcast_url(url):
|
||||
st\.fm # https://podsights.com/docs/
|
||||
)/e
|
||||
)/''', '', url)
|
||||
|
||||
|
||||
def traverse_obj(obj, *paths, **kwargs):
|
||||
"""
|
||||
Safely traverse nested `dict`s and `Sequence`s
|
||||
|
||||
>>> obj = [{}, {"key": "value"}]
|
||||
>>> traverse_obj(obj, (1, "key"))
|
||||
"value"
|
||||
|
||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||
The next path will also be tested if the path branched but no results could be found.
|
||||
Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
|
||||
A value of None is treated as the absence of a value.
|
||||
|
||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||
|
||||
The keys in the path can be one of:
|
||||
- `None`: Return the current object.
|
||||
- `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`.
|
||||
- `slice`: Branch out and return all values in `obj[key]`.
|
||||
- `Ellipsis`: Branch out and return a list of all values.
|
||||
- `tuple`/`list`: Branch out and return a list of all matching values.
|
||||
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
||||
- `function`: Branch out and return values filtered by the function.
|
||||
Read as: `[value for key, value in obj if function(key, value)]`.
|
||||
For `Sequence`s, `key` is the index of the value.
|
||||
- `dict` Transform the current object and return a matching dict.
|
||||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||
|
||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||
|
||||
@params paths Paths which to traverse by.
|
||||
Keyword arguments:
|
||||
@param default Value to return if the paths do not match.
|
||||
@param expected_type If a `type`, only accept final values of this type.
|
||||
If any other callable, try to call the function on each result.
|
||||
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
||||
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
||||
|
||||
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
|
||||
|
||||
@param _is_user_input Whether the keys are generated from user input.
|
||||
If `True` strings get converted to `int`/`slice` if needed.
|
||||
@param _traverse_string Whether to traverse into objects as strings.
|
||||
If `True`, any non-compatible object will first be
|
||||
converted into a string and then traversed into.
|
||||
|
||||
|
||||
@returns The result of the object traversal.
|
||||
If successful, `get_all=True`, and the path branches at least once,
|
||||
then a list of results is returned instead.
|
||||
A list is always returned if the last path branches and no `default` is given.
|
||||
"""
|
||||
|
||||
# parameter defaults
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
expected_type = kwargs.get('expected_type')
|
||||
get_all = kwargs.get('get_all', True)
|
||||
casesense = kwargs.get('casesense', True)
|
||||
_is_user_input = kwargs.get('_is_user_input', False)
|
||||
_traverse_string = kwargs.get('_traverse_string', False)
|
||||
|
||||
# instant compat
|
||||
str = compat_str
|
||||
|
||||
is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
|
||||
# stand-in until compat_re_Match is added
|
||||
compat_re_Match = type(re.match('a', 'a'))
|
||||
# stand-in until casefold.py is added
|
||||
try:
|
||||
''.casefold()
|
||||
compat_casefold = lambda s: s.casefold()
|
||||
except AttributeError:
|
||||
compat_casefold = lambda s: s.lower()
|
||||
casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
|
||||
|
||||
if isinstance(expected_type, type):
|
||||
type_test = lambda val: val if isinstance(val, expected_type) else None
|
||||
else:
|
||||
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
|
||||
|
||||
def from_iterable(iterables):
|
||||
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
|
||||
for it in iterables:
|
||||
for item in it:
|
||||
yield item
|
||||
|
||||
def apply_key(key, obj):
|
||||
if obj is None:
|
||||
return
|
||||
|
||||
elif key is None:
|
||||
yield obj
|
||||
|
||||
elif isinstance(key, (list, tuple)):
|
||||
for branch in key:
|
||||
_, result = apply_path(obj, branch)
|
||||
for item in result:
|
||||
yield item
|
||||
|
||||
elif key is Ellipsis:
|
||||
result = []
|
||||
if isinstance(obj, compat_collections_abc.Mapping):
|
||||
result = obj.values()
|
||||
elif is_sequence(obj):
|
||||
result = obj
|
||||
elif isinstance(obj, compat_re_Match):
|
||||
result = obj.groups()
|
||||
elif _traverse_string:
|
||||
result = str(obj)
|
||||
for item in result:
|
||||
yield item
|
||||
|
||||
elif callable(key):
|
||||
if is_sequence(obj):
|
||||
iter_obj = enumerate(obj)
|
||||
elif isinstance(obj, compat_collections_abc.Mapping):
|
||||
iter_obj = obj.items()
|
||||
elif isinstance(obj, compat_re_Match):
|
||||
iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
|
||||
elif _traverse_string:
|
||||
iter_obj = enumerate(str(obj))
|
||||
else:
|
||||
return
|
||||
for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
|
||||
yield item
|
||||
|
||||
elif isinstance(key, dict):
|
||||
iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
|
||||
yield dict((k, v if v is not None else default) for k, v in iter_obj
|
||||
if v is not None or default is not NO_DEFAULT)
|
||||
|
||||
elif isinstance(obj, compat_collections_abc.Mapping):
|
||||
yield (obj.get(key) if casesense or (key in obj)
|
||||
else next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||
|
||||
elif isinstance(obj, compat_re_Match):
|
||||
if isinstance(key, int) or casesense:
|
||||
try:
|
||||
yield obj.group(key)
|
||||
return
|
||||
except IndexError:
|
||||
pass
|
||||
if not isinstance(key, str):
|
||||
return
|
||||
|
||||
yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
||||
|
||||
else:
|
||||
if _is_user_input:
|
||||
key = (int_or_none(key) if ':' not in key
|
||||
else slice(*map(int_or_none, key.split(':'))))
|
||||
|
||||
if not isinstance(key, (int, slice)):
|
||||
return
|
||||
|
||||
if not is_sequence(obj):
|
||||
if not _traverse_string:
|
||||
return
|
||||
obj = str(obj)
|
||||
|
||||
try:
|
||||
yield obj[key]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def apply_path(start_obj, path):
|
||||
objs = (start_obj,)
|
||||
has_branched = False
|
||||
|
||||
for key in variadic(path):
|
||||
if _is_user_input and key == ':':
|
||||
key = Ellipsis
|
||||
|
||||
if not casesense and isinstance(key, str):
|
||||
key = compat_casefold(key)
|
||||
|
||||
if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
|
||||
has_branched = True
|
||||
|
||||
key_func = functools.partial(apply_key, key)
|
||||
objs = from_iterable(map(key_func, objs))
|
||||
|
||||
return has_branched, objs
|
||||
|
||||
def _traverse_obj(obj, path, use_list=True):
|
||||
has_branched, results = apply_path(obj, path)
|
||||
results = LazyList(x for x in map(type_test, results) if x is not None)
|
||||
|
||||
if get_all and has_branched:
|
||||
return results.exhaust() if results or use_list else None
|
||||
|
||||
return results[0] if results else None
|
||||
|
||||
for index, path in enumerate(paths, 1):
|
||||
use_list = default is NO_DEFAULT and index == len(paths)
|
||||
result = _traverse_obj(obj, path, use_list)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
return None if default is NO_DEFAULT else default
|
||||
|
||||
|
||||
def get_first(obj, keys, **kwargs):
|
||||
return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
|
||||
|
||||
|
||||
def join_nonempty(*values, **kwargs):
|
||||
|
||||
# parameter defaults
|
||||
delim = kwargs.get('delim', '-')
|
||||
from_dict = kwargs.get('from_dict')
|
||||
|
||||
if from_dict is not None:
|
||||
values = (traverse_obj(from_dict, variadic(v)) for v in values)
|
||||
return delim.join(map(compat_str, filter(None, values)))
|
||||
|
Reference in New Issue
Block a user