mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-05-11 08:14:50 +09:00
Compare commits
18 Commits
aa7e27f935
...
b7ec769e97
Author | SHA1 | Date | |
---|---|---|---|
![]() |
b7ec769e97 | ||
![]() |
c052a16f72 | ||
![]() |
bd2ded59f2 | ||
![]() |
16b7e97afa | ||
![]() |
d21717978c | ||
![]() |
7513413794 | ||
![]() |
67dbfa65f2 | ||
![]() |
6eb6d6dff5 | ||
![]() |
6c40d9f847 | ||
![]() |
1b08d3281d | ||
![]() |
32b8d31780 | ||
![]() |
570b868078 | ||
![]() |
2190e89260 | ||
![]() |
7e136639db | ||
![]() |
cedeeed56f | ||
![]() |
add4622870 | ||
![]() |
9a6ddece4d | ||
![]() |
360104ec18 |
@ -934,6 +934,7 @@
|
|||||||
- **TeleBruxelles**
|
- **TeleBruxelles**
|
||||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||||
- **Telegraaf**
|
- **Telegraaf**
|
||||||
|
- **Telegram**
|
||||||
- **TeleMB**
|
- **TeleMB**
|
||||||
- **TeleQuebec**
|
- **TeleQuebec**
|
||||||
- **TeleQuebecEmission**
|
- **TeleQuebecEmission**
|
||||||
|
@ -63,7 +63,7 @@ class TestCache(unittest.TestCase):
|
|||||||
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
obj = {'x': 1, 'y': ['ä', '\\a', True]}
|
||||||
c.store('test_cache', 'k.', obj)
|
c.store('test_cache', 'k.', obj)
|
||||||
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
|
self.assertEqual(c.load('test_cache', 'k.', min_ver='1970.01.01'), obj)
|
||||||
new_version = '.'.join(('%d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
|
new_version = '.'.join(('%0.2d' % ((v + 1) if i == 0 else v, )) for i, v in enumerate(version_tuple(__version__)))
|
||||||
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
|
self.assertIs(c.load('test_cache', 'k.', min_ver=new_version), None)
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ from __future__ import unicode_literals
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import math
|
import math
|
||||||
@ -146,6 +147,25 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
# https://github.com/ytdl-org/youtube-dl/issues/32815
|
# https://github.com/ytdl-org/youtube-dl/issues/32815
|
||||||
self._test('function f(){return 0 - 7 * - 6;}', 42)
|
self._test('function f(){return 0 - 7 * - 6;}', 42)
|
||||||
|
|
||||||
|
def test_bitwise_operators_typecast(self):
|
||||||
|
# madness
|
||||||
|
self._test('function f(){return null << 5}', 0)
|
||||||
|
self._test('function f(){return undefined >> 5}', 0)
|
||||||
|
self._test('function f(){return 42 << NaN}', 42)
|
||||||
|
self._test('function f(){return 42 << Infinity}', 42)
|
||||||
|
self._test('function f(){return 0.0 << null}', 0)
|
||||||
|
self._test('function f(){return NaN << 42}', 0)
|
||||||
|
self._test('function f(){return "21.9" << 1}', 42)
|
||||||
|
self._test('function f(){return true << "5";}', 32)
|
||||||
|
self._test('function f(){return true << true;}', 2)
|
||||||
|
self._test('function f(){return "19" & "21.9";}', 17)
|
||||||
|
self._test('function f(){return "19" & false;}', 0)
|
||||||
|
self._test('function f(){return "11.0" >> "2.1";}', 2)
|
||||||
|
self._test('function f(){return 5 ^ 9;}', 12)
|
||||||
|
self._test('function f(){return 0.0 << NaN}', 0)
|
||||||
|
self._test('function f(){return null << undefined}', 0)
|
||||||
|
self._test('function f(){return 21 << 4294967297}', 42)
|
||||||
|
|
||||||
def test_array_access(self):
|
def test_array_access(self):
|
||||||
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
||||||
|
|
||||||
@ -160,6 +180,7 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
|
self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31)
|
||||||
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51)
|
||||||
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11)
|
||||||
|
self._test('function f(){var x = 2; var y = ["a", "b"]; y[x%y["length"]]="z"; return y}', ['z', 'b'])
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
self._test('''
|
self._test('''
|
||||||
@ -351,6 +372,13 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
|
self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7)
|
||||||
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5)
|
||||||
|
|
||||||
|
def test_not(self):
|
||||||
|
self._test('function f() { return ! undefined; }', True)
|
||||||
|
self._test('function f() { return !0; }', True)
|
||||||
|
self._test('function f() { return !!0; }', False)
|
||||||
|
self._test('function f() { return ![]; }', False)
|
||||||
|
self._test('function f() { return !0 !== false; }', True)
|
||||||
|
|
||||||
def test_void(self):
|
def test_void(self):
|
||||||
self._test('function f() { return void 42; }', JS_Undefined)
|
self._test('function f() { return void 42; }', JS_Undefined)
|
||||||
|
|
||||||
@ -435,6 +463,7 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
|
|
||||||
def test_regex(self):
|
def test_regex(self):
|
||||||
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
self._test('function f() { let a=/,,[/,913,/](,)}/; }', None)
|
||||||
|
self._test('function f() { let a=/,,[/,913,/](,)}/; return a.source; }', ',,[/,913,/](,)}')
|
||||||
|
|
||||||
jsi = JSInterpreter('''
|
jsi = JSInterpreter('''
|
||||||
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
function x() { let a=/,,[/,913,/](,)}/; "".replace(a, ""); return a; }
|
||||||
@ -482,25 +511,6 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test('function f(){return -524999584 << 5}', 379882496)
|
self._test('function f(){return -524999584 << 5}', 379882496)
|
||||||
self._test('function f(){return 1236566549 << 5}', 915423904)
|
self._test('function f(){return 1236566549 << 5}', 915423904)
|
||||||
|
|
||||||
def test_bitwise_operators_typecast(self):
|
|
||||||
# madness
|
|
||||||
self._test('function f(){return null << 5}', 0)
|
|
||||||
self._test('function f(){return undefined >> 5}', 0)
|
|
||||||
self._test('function f(){return 42 << NaN}', 42)
|
|
||||||
self._test('function f(){return 42 << Infinity}', 42)
|
|
||||||
self._test('function f(){return 0.0 << null}', 0)
|
|
||||||
self._test('function f(){return NaN << 42}', 0)
|
|
||||||
self._test('function f(){return "21.9" << 1}', 42)
|
|
||||||
self._test('function f(){return 21 << 4294967297}', 42)
|
|
||||||
self._test('function f(){return true << "5";}', 32)
|
|
||||||
self._test('function f(){return true << true;}', 2)
|
|
||||||
self._test('function f(){return "19" & "21.9";}', 17)
|
|
||||||
self._test('function f(){return "19" & false;}', 0)
|
|
||||||
self._test('function f(){return "11.0" >> "2.1";}', 2)
|
|
||||||
self._test('function f(){return 5 ^ 9;}', 12)
|
|
||||||
self._test('function f(){return 0.0 << NaN}', 0)
|
|
||||||
self._test('function f(){return null << undefined}', 0)
|
|
||||||
|
|
||||||
def test_negative(self):
|
def test_negative(self):
|
||||||
self._test('function f(){return 2 * -2.0 ;}', -4)
|
self._test('function f(){return 2 * -2.0 ;}', -4)
|
||||||
self._test('function f(){return 2 - - -2 ;}', 0)
|
self._test('function f(){return 2 - - -2 ;}', 0)
|
||||||
@ -543,6 +553,8 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
test_result = list('test')
|
test_result = list('test')
|
||||||
tests = [
|
tests = [
|
||||||
'function f(a, b){return a.split(b)}',
|
'function f(a, b){return a.split(b)}',
|
||||||
|
'function f(a, b){return a["split"](b)}',
|
||||||
|
'function f(a, b){let x = ["split"]; return a[x[0]](b)}',
|
||||||
'function f(a, b){return String.prototype.split.call(a, b)}',
|
'function f(a, b){return String.prototype.split.call(a, b)}',
|
||||||
'function f(a, b){return String.prototype.split.apply(a, [b])}',
|
'function f(a, b){return String.prototype.split.apply(a, [b])}',
|
||||||
]
|
]
|
||||||
@ -593,6 +605,9 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
self._test('function f(){return "012345678".slice(-1, 1)}', '')
|
||||||
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
|
||||||
|
|
||||||
|
def test_splice(self):
|
||||||
|
self._test('function f(){var T = ["0", "1", "2"]; T["splice"](2, 1, "0")[0]; return T }', ['0', '1', '0'])
|
||||||
|
|
||||||
def test_pop(self):
|
def test_pop(self):
|
||||||
# pop
|
# pop
|
||||||
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
|
self._test('function f(){var a = [0, 1, 2, 3, 4, 5, 6, 7, 8]; return [a.pop(), a]}',
|
||||||
@ -627,6 +642,16 @@ class TestJSInterpreter(unittest.TestCase):
|
|||||||
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
'return [ret.length, ret[0][0], ret[1][1], ret[0][2]]}',
|
||||||
[2, 4, 1, [4, 2]])
|
[2, 4, 1, [4, 2]])
|
||||||
|
|
||||||
|
def test_extract_function(self):
|
||||||
|
jsi = JSInterpreter('function a(b) { return b + 1; }')
|
||||||
|
func = jsi.extract_function('a')
|
||||||
|
self.assertEqual(func([2]), 3)
|
||||||
|
|
||||||
|
def test_extract_function_with_global_stack(self):
|
||||||
|
jsi = JSInterpreter('function c(d) { return d + e + f + g; }')
|
||||||
|
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
|
||||||
|
self.assertEqual(func([1]), 1111)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -94,11 +94,51 @@ _SIG_TESTS = [
|
|||||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/363db69b/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpz2ICs6EVdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/4fcd6e4a/player_ias.vflset/en_US/base.js',
|
||||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/4fcd6e4a/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'wAOAOq0QJ8ARAIgXmPlOPSBkkUs1bYFYlJCfe29xx8q7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/player_ias.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'7AOq0QJ8wRAIgXmPlOPSBkkAs1bYFYlJCfe29xx8jOv1pDL0Q2bdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0qaw',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||||
|
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||||
|
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
_NSIG_TESTS = [
|
_NSIG_TESTS = [
|
||||||
@ -272,7 +312,7 @@ _NSIG_TESTS = [
|
|||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
|
||||||
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
|
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/363db69b/player_ias.vflset/en_US/base.js',
|
||||||
@ -286,6 +326,26 @@ _NSIG_TESTS = [
|
|||||||
'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
|
'https://www.youtube.com/s/player/4fcd6e4a/tv-player-ias.vflset/tv-player-ias.js',
|
||||||
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
'o_L251jm8yhZkWtBW', 'lXoxI3XvToqn6A',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/tv-player-ias.vflset/tv-player-ias.js',
|
||||||
|
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/player-plasma-ias-phone-en_US.vflset/base.js',
|
||||||
|
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20830619/player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||||
|
'ir9-V6cdbCiyKxhr', '9YE85kNjZiS4',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/8a8ac953/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||||
|
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -335,7 +395,7 @@ def t_factory(name, sig_func, url_pattern):
|
|||||||
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
|
test_id = re.sub(r'[/.-]', '_', m.group('id') or m.group('compat_id'))
|
||||||
|
|
||||||
def test_func(self):
|
def test_func(self):
|
||||||
basename = 'player-{0}-{1}.js'.format(name, test_id)
|
basename = 'player-{0}.js'.format(test_id)
|
||||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||||
|
|
||||||
if not os.path.exists(fn):
|
if not os.path.exists(fn):
|
||||||
|
@ -540,10 +540,14 @@ class YoutubeDL(object):
|
|||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
return self.to_stdout(message, skip_eol, check_quiet=True)
|
return self.to_stdout(message, skip_eol, check_quiet=True)
|
||||||
|
|
||||||
def _write_string(self, s, out=None):
|
def _write_string(self, s, out=None, only_once=False, _cache=set()):
|
||||||
|
if only_once and s in _cache:
|
||||||
|
return
|
||||||
write_string(s, out=out, encoding=self.params.get('encoding'))
|
write_string(s, out=out, encoding=self.params.get('encoding'))
|
||||||
|
if only_once:
|
||||||
|
_cache.add(s)
|
||||||
|
|
||||||
def to_stdout(self, message, skip_eol=False, check_quiet=False):
|
def to_stdout(self, message, skip_eol=False, check_quiet=False, only_once=False):
|
||||||
"""Print message to stdout if not in quiet mode."""
|
"""Print message to stdout if not in quiet mode."""
|
||||||
if self.params.get('logger'):
|
if self.params.get('logger'):
|
||||||
self.params['logger'].debug(message)
|
self.params['logger'].debug(message)
|
||||||
@ -552,9 +556,9 @@ class YoutubeDL(object):
|
|||||||
terminator = ['\n', ''][skip_eol]
|
terminator = ['\n', ''][skip_eol]
|
||||||
output = message + terminator
|
output = message + terminator
|
||||||
|
|
||||||
self._write_string(output, self._screen_file)
|
self._write_string(output, self._screen_file, only_once=only_once)
|
||||||
|
|
||||||
def to_stderr(self, message):
|
def to_stderr(self, message, only_once=False):
|
||||||
"""Print message to stderr."""
|
"""Print message to stderr."""
|
||||||
assert isinstance(message, compat_str)
|
assert isinstance(message, compat_str)
|
||||||
if self.params.get('logger'):
|
if self.params.get('logger'):
|
||||||
@ -562,7 +566,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
message = self._bidi_workaround(message)
|
message = self._bidi_workaround(message)
|
||||||
output = message + '\n'
|
output = message + '\n'
|
||||||
self._write_string(output, self._err_file)
|
self._write_string(output, self._err_file, only_once=only_once)
|
||||||
|
|
||||||
def to_console_title(self, message):
|
def to_console_title(self, message):
|
||||||
if not self.params.get('consoletitle', False):
|
if not self.params.get('consoletitle', False):
|
||||||
@ -641,18 +645,11 @@ class YoutubeDL(object):
|
|||||||
raise DownloadError(message, exc_info)
|
raise DownloadError(message, exc_info)
|
||||||
self._download_retcode = 1
|
self._download_retcode = 1
|
||||||
|
|
||||||
def report_warning(self, message, only_once=False, _cache={}):
|
def report_warning(self, message, only_once=False):
|
||||||
'''
|
'''
|
||||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||||
If stderr is a tty file the 'WARNING:' will be colored
|
If stderr is a tty file the 'WARNING:' will be colored
|
||||||
'''
|
'''
|
||||||
if only_once:
|
|
||||||
m_hash = hash((self, message))
|
|
||||||
m_cnt = _cache.setdefault(m_hash, 0)
|
|
||||||
_cache[m_hash] = m_cnt + 1
|
|
||||||
if m_cnt > 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.params.get('logger') is not None:
|
if self.params.get('logger') is not None:
|
||||||
self.params['logger'].warning(message)
|
self.params['logger'].warning(message)
|
||||||
else:
|
else:
|
||||||
@ -663,7 +660,7 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
_msg_header = 'WARNING:'
|
_msg_header = 'WARNING:'
|
||||||
warning_message = '%s %s' % (_msg_header, message)
|
warning_message = '%s %s' % (_msg_header, message)
|
||||||
self.to_stderr(warning_message)
|
self.to_stderr(warning_message, only_once=only_once)
|
||||||
|
|
||||||
def report_error(self, message, *args, **kwargs):
|
def report_error(self, message, *args, **kwargs):
|
||||||
'''
|
'''
|
||||||
@ -677,6 +674,16 @@ class YoutubeDL(object):
|
|||||||
kwargs['message'] = '%s %s' % (_msg_header, message)
|
kwargs['message'] = '%s %s' % (_msg_header, message)
|
||||||
self.trouble(*args, **kwargs)
|
self.trouble(*args, **kwargs)
|
||||||
|
|
||||||
|
def write_debug(self, message, only_once=False):
|
||||||
|
'''Log debug message or Print message to stderr'''
|
||||||
|
if not self.params.get('verbose', False):
|
||||||
|
return
|
||||||
|
message = '[debug] {0}'.format(message)
|
||||||
|
if self.params.get('logger'):
|
||||||
|
self.params['logger'].debug(message)
|
||||||
|
else:
|
||||||
|
self.to_stderr(message, only_once)
|
||||||
|
|
||||||
def report_unscoped_cookies(self, *args, **kwargs):
|
def report_unscoped_cookies(self, *args, **kwargs):
|
||||||
# message=None, tb=False, is_error=False
|
# message=None, tb=False, is_error=False
|
||||||
if len(args) <= 2:
|
if len(args) <= 2:
|
||||||
@ -2514,7 +2521,7 @@ class YoutubeDL(object):
|
|||||||
self.get_encoding()))
|
self.get_encoding()))
|
||||||
write_string(encoding_str, encoding=None)
|
write_string(encoding_str, encoding=None)
|
||||||
|
|
||||||
writeln_debug = lambda *s: self._write_string('[debug] %s\n' % (''.join(s), ))
|
writeln_debug = lambda *s: self.write_debug(''.join(s))
|
||||||
writeln_debug('youtube-dl version ', __version__)
|
writeln_debug('youtube-dl version ', __version__)
|
||||||
if _LAZY_LOADER:
|
if _LAZY_LOADER:
|
||||||
writeln_debug('Lazy loading extractors enabled')
|
writeln_debug('Lazy loading extractors enabled')
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import errno
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -8,14 +8,17 @@ import shutil
|
|||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from .compat import (
|
from .compat import (
|
||||||
|
compat_contextlib_suppress,
|
||||||
compat_getenv,
|
compat_getenv,
|
||||||
compat_open as open,
|
compat_open as open,
|
||||||
|
compat_os_makedirs,
|
||||||
)
|
)
|
||||||
from .utils import (
|
from .utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
|
escape_rfc3986,
|
||||||
expand_path,
|
expand_path,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
try_get,
|
traverse_obj,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
)
|
)
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
@ -30,23 +33,35 @@ class Cache(object):
|
|||||||
def __init__(self, ydl):
|
def __init__(self, ydl):
|
||||||
self._ydl = ydl
|
self._ydl = ydl
|
||||||
|
|
||||||
|
def _write_debug(self, *args, **kwargs):
|
||||||
|
self._ydl.write_debug(*args, **kwargs)
|
||||||
|
|
||||||
|
def _report_warning(self, *args, **kwargs):
|
||||||
|
self._ydl.report_warning(*args, **kwargs)
|
||||||
|
|
||||||
|
def _to_screen(self, *args, **kwargs):
|
||||||
|
self._ydl.to_screen(*args, **kwargs)
|
||||||
|
|
||||||
|
def _get_param(self, k, default=None):
|
||||||
|
return self._ydl.params.get(k, default)
|
||||||
|
|
||||||
def _get_root_dir(self):
|
def _get_root_dir(self):
|
||||||
res = self._ydl.params.get('cachedir')
|
res = self._get_param('cachedir')
|
||||||
if res is None:
|
if res is None:
|
||||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||||
res = os.path.join(cache_root, self._YTDL_DIR)
|
res = os.path.join(cache_root, self._YTDL_DIR)
|
||||||
return expand_path(res)
|
return expand_path(res)
|
||||||
|
|
||||||
def _get_cache_fn(self, section, key, dtype):
|
def _get_cache_fn(self, section, key, dtype):
|
||||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
|
assert re.match(r'^[\w.-]+$', section), \
|
||||||
'invalid section %r' % section
|
'invalid section %r' % section
|
||||||
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
|
key = escape_rfc3986(key, safe='').replace('%', ',') # encode non-ascii characters
|
||||||
return os.path.join(
|
return os.path.join(
|
||||||
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
self._get_root_dir(), section, '%s.%s' % (key, dtype))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def enabled(self):
|
def enabled(self):
|
||||||
return self._ydl.params.get('cachedir') is not False
|
return self._get_param('cachedir') is not False
|
||||||
|
|
||||||
def store(self, section, key, data, dtype='json'):
|
def store(self, section, key, data, dtype='json'):
|
||||||
assert dtype in ('json',)
|
assert dtype in ('json',)
|
||||||
@ -56,61 +71,55 @@ class Cache(object):
|
|||||||
|
|
||||||
fn = self._get_cache_fn(section, key, dtype)
|
fn = self._get_cache_fn(section, key, dtype)
|
||||||
try:
|
try:
|
||||||
try:
|
compat_os_makedirs(os.path.dirname(fn), exist_ok=True)
|
||||||
os.makedirs(os.path.dirname(fn))
|
self._write_debug('Saving {section}.{key} to cache'.format(section=section, key=key))
|
||||||
except OSError as ose:
|
|
||||||
if ose.errno != errno.EEXIST:
|
|
||||||
raise
|
|
||||||
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
||||||
except Exception:
|
except Exception:
|
||||||
tb = traceback.format_exc()
|
tb = traceback.format_exc()
|
||||||
self._ydl.report_warning(
|
self._report_warning('Writing cache to {fn!r} failed: {tb}'.format(fn=fn, tb=tb))
|
||||||
'Writing cache to %r failed: %s' % (fn, tb))
|
|
||||||
|
|
||||||
def _validate(self, data, min_ver):
|
def _validate(self, data, min_ver):
|
||||||
version = try_get(data, lambda x: x[self._VERSION_KEY])
|
version = traverse_obj(data, self._VERSION_KEY)
|
||||||
if not version: # Backward compatibility
|
if not version: # Backward compatibility
|
||||||
data, version = {'data': data}, self._DEFAULT_VERSION
|
data, version = {'data': data}, self._DEFAULT_VERSION
|
||||||
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
||||||
return data['data']
|
return data['data']
|
||||||
self._ydl.to_screen(
|
self._write_debug('Discarding old cache from version {version} (needs {min_ver})'.format(version=version, min_ver=min_ver))
|
||||||
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
|
|
||||||
|
|
||||||
def load(self, section, key, dtype='json', default=None, min_ver=None):
|
def load(self, section, key, dtype='json', default=None, **kw_min_ver):
|
||||||
assert dtype in ('json',)
|
assert dtype in ('json',)
|
||||||
|
min_ver = kw_min_ver.get('min_ver')
|
||||||
|
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
return default
|
return default
|
||||||
|
|
||||||
cache_fn = self._get_cache_fn(section, key, dtype)
|
cache_fn = self._get_cache_fn(section, key, dtype)
|
||||||
try:
|
with compat_contextlib_suppress(IOError): # If no cache available
|
||||||
try:
|
try:
|
||||||
with open(cache_fn, 'r', encoding='utf-8') as cachef:
|
with open(cache_fn, encoding='utf-8') as cachef:
|
||||||
|
self._write_debug('Loading {section}.{key} from cache'.format(section=section, key=key), only_once=True)
|
||||||
return self._validate(json.load(cachef), min_ver)
|
return self._validate(json.load(cachef), min_ver)
|
||||||
except ValueError:
|
except (ValueError, KeyError):
|
||||||
try:
|
try:
|
||||||
file_size = os.path.getsize(cache_fn)
|
file_size = os.path.getsize(cache_fn)
|
||||||
except (OSError, IOError) as oe:
|
except (OSError, IOError) as oe:
|
||||||
file_size = error_to_compat_str(oe)
|
file_size = error_to_compat_str(oe)
|
||||||
self._ydl.report_warning(
|
self._report_warning('Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
|
||||||
except IOError:
|
|
||||||
pass # No cache available
|
|
||||||
|
|
||||||
return default
|
return default
|
||||||
|
|
||||||
def remove(self):
|
def remove(self):
|
||||||
if not self.enabled:
|
if not self.enabled:
|
||||||
self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
self._to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
|
||||||
return
|
return
|
||||||
|
|
||||||
cachedir = self._get_root_dir()
|
cachedir = self._get_root_dir()
|
||||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
raise Exception('Not removing directory %s - this does not look like a cache dir' % (cachedir,))
|
||||||
|
|
||||||
self._ydl.to_screen(
|
self._to_screen(
|
||||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
'Removing cache dir %s .' % (cachedir,), skip_eol=True, ),
|
||||||
if os.path.exists(cachedir):
|
if os.path.exists(cachedir):
|
||||||
self._ydl.to_screen('.', skip_eol=True)
|
self._to_screen('.', skip_eol=True)
|
||||||
shutil.rmtree(cachedir)
|
shutil.rmtree(cachedir)
|
||||||
self._ydl.to_screen('.')
|
self._to_screen('.')
|
||||||
|
@ -2498,8 +2498,7 @@ try:
|
|||||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
_asciire = getattr(compat_urllib_parse, '_asciire', None) or re.compile(r'([\x00-\x7f]+)')
|
||||||
else re.compile(r'([\x00-\x7f]+)'))
|
|
||||||
|
|
||||||
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
|
||||||
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
# implementations from cpython 3.4.3's stdlib. Python 2's version
|
||||||
@ -2567,24 +2566,21 @@ except ImportError: # Python 2
|
|||||||
# Possible solutions are to either port it from python 3 with all
|
# Possible solutions are to either port it from python 3 with all
|
||||||
# the friends or manually ensure input query contains only byte strings.
|
# the friends or manually ensure input query contains only byte strings.
|
||||||
# We will stick with latter thus recursively encoding the whole query.
|
# We will stick with latter thus recursively encoding the whole query.
|
||||||
def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
|
def compat_urllib_parse_urlencode(query, doseq=0, safe='', encoding='utf-8', errors='strict'):
|
||||||
|
|
||||||
def encode_elem(e):
|
def encode_elem(e):
|
||||||
if isinstance(e, dict):
|
if isinstance(e, dict):
|
||||||
e = encode_dict(e)
|
e = encode_dict(e)
|
||||||
elif isinstance(e, (list, tuple,)):
|
elif isinstance(e, (list, tuple,)):
|
||||||
list_e = encode_list(e)
|
e = type(e)(encode_elem(el) for el in e)
|
||||||
e = tuple(list_e) if isinstance(e, tuple) else list_e
|
|
||||||
elif isinstance(e, compat_str):
|
elif isinstance(e, compat_str):
|
||||||
e = e.encode(encoding)
|
e = e.encode(encoding, errors)
|
||||||
return e
|
return e
|
||||||
|
|
||||||
def encode_dict(d):
|
def encode_dict(d):
|
||||||
return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
|
return tuple((encode_elem(k), encode_elem(v)) for k, v in d.items())
|
||||||
|
|
||||||
def encode_list(l):
|
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq).decode('ascii')
|
||||||
return [encode_elem(e) for e in l]
|
|
||||||
|
|
||||||
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
|
|
||||||
|
|
||||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||||
# Python 2's version is apparently totally broken
|
# Python 2's version is apparently totally broken
|
||||||
@ -2639,6 +2635,57 @@ except ImportError: # Python 2
|
|||||||
('parse_qs', compat_parse_qs)):
|
('parse_qs', compat_parse_qs)):
|
||||||
setattr(compat_urllib_parse, name, fix)
|
setattr(compat_urllib_parse, name, fix)
|
||||||
|
|
||||||
|
try:
|
||||||
|
all(chr(i) in b'' for i in range(256))
|
||||||
|
except TypeError:
|
||||||
|
# not all chr(i) are str: patch Python2 quote
|
||||||
|
|
||||||
|
_safemaps = getattr(compat_urllib_parse, '_safemaps', {})
|
||||||
|
_always_safe = frozenset(compat_urllib_parse.always_safe)
|
||||||
|
|
||||||
|
def _quote(s, safe='/'):
|
||||||
|
"""quote('abc def') -> 'abc%20def'"""
|
||||||
|
|
||||||
|
if not s and s is not None: # fast path
|
||||||
|
return s
|
||||||
|
safe = frozenset(safe)
|
||||||
|
cachekey = (safe, _always_safe)
|
||||||
|
try:
|
||||||
|
safe_map = _safemaps[cachekey]
|
||||||
|
except KeyError:
|
||||||
|
safe = _always_safe | safe
|
||||||
|
safe_map = {}
|
||||||
|
for i in range(256):
|
||||||
|
c = chr(i)
|
||||||
|
safe_map[c] = (
|
||||||
|
c if (i < 128 and c in safe)
|
||||||
|
else b'%{0:02X}'.format(i))
|
||||||
|
_safemaps[cachekey] = safe_map
|
||||||
|
|
||||||
|
if safe.issuperset(s):
|
||||||
|
return s
|
||||||
|
return ''.join(safe_map[c] for c in s)
|
||||||
|
|
||||||
|
# linked code
|
||||||
|
def _quote_plus(s, safe=''):
|
||||||
|
return (
|
||||||
|
_quote(s, safe + b' ').replace(b' ', b'+') if b' ' in s
|
||||||
|
else _quote(s, safe))
|
||||||
|
|
||||||
|
# linked code
|
||||||
|
def _urlcleanup():
|
||||||
|
if compat_urllib_parse._urlopener:
|
||||||
|
compat_urllib_parse._urlopener.cleanup()
|
||||||
|
_safemaps.clear()
|
||||||
|
compat_urllib_parse.ftpcache.clear()
|
||||||
|
|
||||||
|
for name, fix in (
|
||||||
|
('quote', _quote),
|
||||||
|
('quote_plus', _quote_plus),
|
||||||
|
('urlcleanup', _urlcleanup)):
|
||||||
|
setattr(compat_urllib_parse, '_' + name, getattr(compat_urllib_parse, name))
|
||||||
|
setattr(compat_urllib_parse, name, fix)
|
||||||
|
|
||||||
compat_urllib_parse_parse_qs = compat_parse_qs
|
compat_urllib_parse_parse_qs = compat_parse_qs
|
||||||
|
|
||||||
|
|
||||||
@ -3120,6 +3167,21 @@ else:
|
|||||||
compat_os_path_expanduser = compat_expanduser
|
compat_os_path_expanduser = compat_expanduser
|
||||||
|
|
||||||
|
|
||||||
|
# compat_os_makedirs
|
||||||
|
try:
|
||||||
|
os.makedirs('.', exist_ok=True)
|
||||||
|
compat_os_makedirs = os.makedirs
|
||||||
|
except TypeError: # < Py3.2
|
||||||
|
from errno import EEXIST as _errno_EEXIST
|
||||||
|
|
||||||
|
def compat_os_makedirs(name, mode=0o777, exist_ok=False):
|
||||||
|
try:
|
||||||
|
return os.makedirs(name, mode=mode)
|
||||||
|
except OSError as ose:
|
||||||
|
if not (exist_ok and ose.errno == _errno_EEXIST):
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
# compat_os_path_realpath
|
# compat_os_path_realpath
|
||||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
# os.path.realpath on Windows does not follow symbolic links
|
# os.path.realpath on Windows does not follow symbolic links
|
||||||
@ -3637,6 +3699,7 @@ __all__ = [
|
|||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
'compat_open',
|
'compat_open',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
|
'compat_os_makedirs',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_os_path_expanduser',
|
'compat_os_path_expanduser',
|
||||||
'compat_os_path_realpath',
|
'compat_os_path_realpath',
|
||||||
|
@ -505,7 +505,7 @@ class InfoExtractor(object):
|
|||||||
if not self._x_forwarded_for_ip:
|
if not self._x_forwarded_for_ip:
|
||||||
|
|
||||||
# Geo bypass mechanism is explicitly disabled by user
|
# Geo bypass mechanism is explicitly disabled by user
|
||||||
if not self._downloader.params.get('geo_bypass', True):
|
if not self.get_param('geo_bypass', True):
|
||||||
return
|
return
|
||||||
|
|
||||||
if not geo_bypass_context:
|
if not geo_bypass_context:
|
||||||
@ -527,7 +527,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
# Explicit IP block specified by user, use it right away
|
# Explicit IP block specified by user, use it right away
|
||||||
# regardless of whether extractor is geo bypassable or not
|
# regardless of whether extractor is geo bypassable or not
|
||||||
ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
|
ip_block = self.get_param('geo_bypass_ip_block', None)
|
||||||
|
|
||||||
# Otherwise use random IP block from geo bypass context but only
|
# Otherwise use random IP block from geo bypass context but only
|
||||||
# if extractor is known as geo bypassable
|
# if extractor is known as geo bypassable
|
||||||
@ -538,8 +538,8 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
if ip_block:
|
if ip_block:
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self.get_param('verbose', False):
|
||||||
self._downloader.to_screen(
|
self.to_screen(
|
||||||
'[debug] Using fake IP %s as X-Forwarded-For.'
|
'[debug] Using fake IP %s as X-Forwarded-For.'
|
||||||
% self._x_forwarded_for_ip)
|
% self._x_forwarded_for_ip)
|
||||||
return
|
return
|
||||||
@ -548,7 +548,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
# Explicit country code specified by user, use it right away
|
# Explicit country code specified by user, use it right away
|
||||||
# regardless of whether extractor is geo bypassable or not
|
# regardless of whether extractor is geo bypassable or not
|
||||||
country = self._downloader.params.get('geo_bypass_country', None)
|
country = self.get_param('geo_bypass_country', None)
|
||||||
|
|
||||||
# Otherwise use random country code from geo bypass context but
|
# Otherwise use random country code from geo bypass context but
|
||||||
# only if extractor is known as geo bypassable
|
# only if extractor is known as geo bypassable
|
||||||
@ -559,8 +559,8 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
if country:
|
if country:
|
||||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
||||||
if self._downloader.params.get('verbose', False):
|
if self.get_param('verbose', False):
|
||||||
self._downloader.to_screen(
|
self.to_screen(
|
||||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||||
% (self._x_forwarded_for_ip, country.upper()))
|
% (self._x_forwarded_for_ip, country.upper()))
|
||||||
|
|
||||||
@ -586,9 +586,9 @@ class InfoExtractor(object):
|
|||||||
raise ExtractorError('An extractor error has occurred.', cause=e)
|
raise ExtractorError('An extractor error has occurred.', cause=e)
|
||||||
|
|
||||||
def __maybe_fake_ip_and_retry(self, countries):
|
def __maybe_fake_ip_and_retry(self, countries):
|
||||||
if (not self._downloader.params.get('geo_bypass_country', None)
|
if (not self.get_param('geo_bypass_country', None)
|
||||||
and self._GEO_BYPASS
|
and self._GEO_BYPASS
|
||||||
and self._downloader.params.get('geo_bypass', True)
|
and self.get_param('geo_bypass', True)
|
||||||
and not self._x_forwarded_for_ip
|
and not self._x_forwarded_for_ip
|
||||||
and countries):
|
and countries):
|
||||||
country_code = random.choice(countries)
|
country_code = random.choice(countries)
|
||||||
@ -698,7 +698,7 @@ class InfoExtractor(object):
|
|||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(errmsg)
|
self.report_warning(errmsg)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||||
@ -770,11 +770,11 @@ class InfoExtractor(object):
|
|||||||
webpage_bytes = prefix + webpage_bytes
|
webpage_bytes = prefix + webpage_bytes
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
||||||
if self._downloader.params.get('dump_intermediate_pages', False):
|
if self.get_param('dump_intermediate_pages', False):
|
||||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||||
self._downloader.to_screen(dump)
|
self.to_screen(dump)
|
||||||
if self._downloader.params.get('write_pages', False):
|
if self.get_param('write_pages', False):
|
||||||
basen = '%s_%s' % (video_id, urlh.geturl())
|
basen = '%s_%s' % (video_id, urlh.geturl())
|
||||||
if len(basen) > 240:
|
if len(basen) > 240:
|
||||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||||
@ -976,19 +976,9 @@ class InfoExtractor(object):
|
|||||||
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
"""Print msg to screen, prefixing it with '[ie_name]'"""
|
||||||
self._downloader.to_screen(self.__ie_msg(msg))
|
self._downloader.to_screen(self.__ie_msg(msg))
|
||||||
|
|
||||||
def write_debug(self, msg, only_once=False, _cache=[]):
|
def write_debug(self, msg, only_once=False):
|
||||||
'''Log debug message or Print message to stderr'''
|
'''Log debug message or Print message to stderr'''
|
||||||
if not self.get_param('verbose', False):
|
self._downloader.write_debug(self.__ie_msg(msg), only_once=only_once)
|
||||||
return
|
|
||||||
message = '[debug] ' + self.__ie_msg(msg)
|
|
||||||
logger = self.get_param('logger')
|
|
||||||
if logger:
|
|
||||||
logger.debug(message)
|
|
||||||
else:
|
|
||||||
if only_once and hash(message) in _cache:
|
|
||||||
return
|
|
||||||
self._downloader.to_stderr(message)
|
|
||||||
_cache.append(hash(message))
|
|
||||||
|
|
||||||
# name, default=None, *args, **kwargs
|
# name, default=None, *args, **kwargs
|
||||||
def get_param(self, name, *args, **kwargs):
|
def get_param(self, name, *args, **kwargs):
|
||||||
@ -1084,7 +1074,7 @@ class InfoExtractor(object):
|
|||||||
if mobj:
|
if mobj:
|
||||||
break
|
break
|
||||||
|
|
||||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||||
_name = '\033[0;34m%s\033[0m' % name
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
else:
|
else:
|
||||||
_name = name
|
_name = name
|
||||||
@ -1102,7 +1092,7 @@ class InfoExtractor(object):
|
|||||||
elif fatal:
|
elif fatal:
|
||||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
|
def _search_json(self, start_pattern, string, name, video_id, **kwargs):
|
||||||
@ -1172,7 +1162,7 @@ class InfoExtractor(object):
|
|||||||
username = None
|
username = None
|
||||||
password = None
|
password = None
|
||||||
|
|
||||||
if self._downloader.params.get('usenetrc', False):
|
if self.get_param('usenetrc', False):
|
||||||
try:
|
try:
|
||||||
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
netrc_machine = netrc_machine or self._NETRC_MACHINE
|
||||||
info = netrc.netrc().authenticators(netrc_machine)
|
info = netrc.netrc().authenticators(netrc_machine)
|
||||||
@ -1183,7 +1173,7 @@ class InfoExtractor(object):
|
|||||||
raise netrc.NetrcParseError(
|
raise netrc.NetrcParseError(
|
||||||
'No authenticators for %s' % netrc_machine)
|
'No authenticators for %s' % netrc_machine)
|
||||||
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
except (AttributeError, IOError, netrc.NetrcParseError) as err:
|
||||||
self._downloader.report_warning(
|
self.report_warning(
|
||||||
'parsing .netrc: %s' % error_to_compat_str(err))
|
'parsing .netrc: %s' % error_to_compat_str(err))
|
||||||
|
|
||||||
return username, password
|
return username, password
|
||||||
@ -1220,10 +1210,10 @@ class InfoExtractor(object):
|
|||||||
"""
|
"""
|
||||||
if self._downloader is None:
|
if self._downloader is None:
|
||||||
return None
|
return None
|
||||||
downloader_params = self._downloader.params
|
|
||||||
|
|
||||||
if downloader_params.get('twofactor') is not None:
|
twofactor = self.get_param('twofactor')
|
||||||
return downloader_params['twofactor']
|
if twofactor is not None:
|
||||||
|
return twofactor
|
||||||
|
|
||||||
return compat_getpass('Type %s and press [Return]: ' % note)
|
return compat_getpass('Type %s and press [Return]: ' % note)
|
||||||
|
|
||||||
@ -1358,7 +1348,7 @@ class InfoExtractor(object):
|
|||||||
elif fatal:
|
elif fatal:
|
||||||
raise RegexNotFoundError('Unable to extract JSON-LD')
|
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
self.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
@ -1589,7 +1579,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
if f.get('vcodec') == 'none': # audio only
|
if f.get('vcodec') == 'none': # audio only
|
||||||
preference -= 50
|
preference -= 50
|
||||||
if self._downloader.params.get('prefer_free_formats'):
|
if self.get_param('prefer_free_formats'):
|
||||||
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
|
||||||
else:
|
else:
|
||||||
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
|
ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
|
||||||
@ -1601,7 +1591,7 @@ class InfoExtractor(object):
|
|||||||
else:
|
else:
|
||||||
if f.get('acodec') == 'none': # video only
|
if f.get('acodec') == 'none': # video only
|
||||||
preference -= 40
|
preference -= 40
|
||||||
if self._downloader.params.get('prefer_free_formats'):
|
if self.get_param('prefer_free_formats'):
|
||||||
ORDER = ['flv', 'mp4', 'webm']
|
ORDER = ['flv', 'mp4', 'webm']
|
||||||
else:
|
else:
|
||||||
ORDER = ['webm', 'flv', 'mp4']
|
ORDER = ['webm', 'flv', 'mp4']
|
||||||
@ -1667,7 +1657,7 @@ class InfoExtractor(object):
|
|||||||
""" Either "http:" or "https:", depending on the user's preferences """
|
""" Either "http:" or "https:", depending on the user's preferences """
|
||||||
return (
|
return (
|
||||||
'http:'
|
'http:'
|
||||||
if self._downloader.params.get('prefer_insecure', False)
|
if self.get_param('prefer_insecure', False)
|
||||||
else 'https:')
|
else 'https:')
|
||||||
|
|
||||||
def _proto_relative_url(self, url, scheme=None):
|
def _proto_relative_url(self, url, scheme=None):
|
||||||
@ -3199,7 +3189,7 @@ class InfoExtractor(object):
|
|||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(msg)
|
self.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _float(self, v, name, fatal=False, **kwargs):
|
def _float(self, v, name, fatal=False, **kwargs):
|
||||||
@ -3209,7 +3199,7 @@ class InfoExtractor(object):
|
|||||||
if fatal:
|
if fatal:
|
||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning(msg)
|
self.report_warning(msg)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||||
@ -3218,12 +3208,12 @@ class InfoExtractor(object):
|
|||||||
0, name, value, port, port is not None, domain, True,
|
0, name, value, port, port is not None, domain, True,
|
||||||
domain.startswith('.'), path, True, secure, expire_time,
|
domain.startswith('.'), path, True, secure, expire_time,
|
||||||
discard, None, None, rest)
|
discard, None, None, rest)
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||||
req = sanitized_Request(url)
|
req = sanitized_Request(url)
|
||||||
self._downloader.cookiejar.add_cookie_header(req)
|
self.cookiejar.add_cookie_header(req)
|
||||||
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
@ -3283,8 +3273,8 @@ class InfoExtractor(object):
|
|||||||
return not any_restricted
|
return not any_restricted
|
||||||
|
|
||||||
def extract_subtitles(self, *args, **kwargs):
|
def extract_subtitles(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writesubtitles', False)
|
if (self.get_param('writesubtitles', False)
|
||||||
or self._downloader.params.get('listsubtitles')):
|
or self.get_param('listsubtitles')):
|
||||||
return self._get_subtitles(*args, **kwargs)
|
return self._get_subtitles(*args, **kwargs)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@ -3305,7 +3295,11 @@ class InfoExtractor(object):
|
|||||||
""" Merge subtitle dictionaries, language by language. """
|
""" Merge subtitle dictionaries, language by language. """
|
||||||
|
|
||||||
# ..., * , target=None
|
# ..., * , target=None
|
||||||
target = kwargs.get('target') or dict(subtitle_dict1)
|
target = kwargs.get('target')
|
||||||
|
if target is None:
|
||||||
|
target = dict(subtitle_dict1)
|
||||||
|
else:
|
||||||
|
subtitle_dicts = (subtitle_dict1,) + subtitle_dicts
|
||||||
|
|
||||||
for subtitle_dict in subtitle_dicts:
|
for subtitle_dict in subtitle_dicts:
|
||||||
for lang in subtitle_dict:
|
for lang in subtitle_dict:
|
||||||
@ -3313,8 +3307,8 @@ class InfoExtractor(object):
|
|||||||
return target
|
return target
|
||||||
|
|
||||||
def extract_automatic_captions(self, *args, **kwargs):
|
def extract_automatic_captions(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('writeautomaticsub', False)
|
if (self.get_param('writeautomaticsub', False)
|
||||||
or self._downloader.params.get('listsubtitles')):
|
or self.get_param('listsubtitles')):
|
||||||
return self._get_automatic_captions(*args, **kwargs)
|
return self._get_automatic_captions(*args, **kwargs)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@ -3322,9 +3316,9 @@ class InfoExtractor(object):
|
|||||||
raise NotImplementedError('This method must be implemented by subclasses')
|
raise NotImplementedError('This method must be implemented by subclasses')
|
||||||
|
|
||||||
def mark_watched(self, *args, **kwargs):
|
def mark_watched(self, *args, **kwargs):
|
||||||
if (self._downloader.params.get('mark_watched', False)
|
if (self.get_param('mark_watched', False)
|
||||||
and (self._get_login_info()[0] is not None
|
and (self._get_login_info()[0] is not None
|
||||||
or self._downloader.params.get('cookiefile') is not None)):
|
or self.get_param('cookiefile') is not None)):
|
||||||
self._mark_watched(*args, **kwargs)
|
self._mark_watched(*args, **kwargs)
|
||||||
|
|
||||||
def _mark_watched(self, *args, **kwargs):
|
def _mark_watched(self, *args, **kwargs):
|
||||||
@ -3332,7 +3326,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def geo_verification_headers(self):
|
def geo_verification_headers(self):
|
||||||
headers = {}
|
headers = {}
|
||||||
geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
|
geo_verification_proxy = self.get_param('geo_verification_proxy')
|
||||||
if geo_verification_proxy:
|
if geo_verification_proxy:
|
||||||
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
headers['Ytdl-request-proxy'] = geo_verification_proxy
|
||||||
return headers
|
return headers
|
||||||
|
@ -1263,6 +1263,7 @@ from .tele13 import Tele13IE
|
|||||||
from .telebruxelles import TeleBruxellesIE
|
from .telebruxelles import TeleBruxellesIE
|
||||||
from .telecinco import TelecincoIE
|
from .telecinco import TelecincoIE
|
||||||
from .telegraaf import TelegraafIE
|
from .telegraaf import TelegraafIE
|
||||||
|
from .telegram import TelegramIE
|
||||||
from .telemb import TeleMBIE
|
from .telemb import TeleMBIE
|
||||||
from .telequebec import (
|
from .telequebec import (
|
||||||
TeleQuebecIE,
|
TeleQuebecIE,
|
||||||
|
60
youtube_dl/extractor/telegram.py
Normal file
60
youtube_dl/extractor/telegram.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TelegramIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://t\.me/(?P<user>[^/]+)/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://t.me/telegram/195',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '195',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'telegram',
|
||||||
|
'description': 'Telegram’s Bot Documentation has been completely overhauled –\xa0adding the latest info, along with detailed screenshots and videos.\n\nNewcomers now have an easy way to learn about all the powerful features, and can build a bot from our step-by-step tutorial with examples for popular programming languages.\n\nExperienced developers can explore recent updates and advanced features, ready for 2022 and beyond.',
|
||||||
|
'duration': 23,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
m = re.match(r'https://t\.me/(?P<channel>[^/]+)/', url)
|
||||||
|
if m is None:
|
||||||
|
raise ExtractorError('Unable to find channel name')
|
||||||
|
title = m.group('channel')
|
||||||
|
embed_url = url + '?embed=1&mode=tme'
|
||||||
|
html = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(r'<video src="([^"]+)"', html, 'video_url')
|
||||||
|
formats = [{'url': video_url}]
|
||||||
|
|
||||||
|
duration = self._search_regex(
|
||||||
|
r'<time class="message_video_duration.*?>(\d+:\d+)<', html,
|
||||||
|
'duration', fatal=False)
|
||||||
|
if duration:
|
||||||
|
try:
|
||||||
|
mins, secs = duration.split(':')
|
||||||
|
secs = int_or_none(secs)
|
||||||
|
mins = int_or_none(mins)
|
||||||
|
duration = None if secs is None or mins is None else secs + 60 * mins
|
||||||
|
except ValueError:
|
||||||
|
duration = None
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<div class="tgme_widget_message_text.*?>(.+?)</div>', html,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'formats': formats
|
||||||
|
}
|
@ -342,14 +342,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
if not self._login():
|
if not self._login():
|
||||||
return
|
return
|
||||||
|
|
||||||
_DEFAULT_API_DATA = {
|
_DEFAULT_API_DATA = {'context': _INNERTUBE_CLIENTS['web']['INNERTUBE_CONTEXT']}
|
||||||
'context': {
|
|
||||||
'client': {
|
|
||||||
'clientName': 'WEB',
|
|
||||||
'clientVersion': '2.20201021.03.00',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||||
@ -497,11 +490,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
data['params'] = params
|
data['params'] = params
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
search = self._download_json(
|
search = self._download_json(
|
||||||
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
'https://www.youtube.com/youtubei/v1/search',
|
||||||
video_id='query "%s"' % query,
|
video_id='query "%s"' % query,
|
||||||
note='Downloading page %s' % page_num,
|
note='Downloading page %s' % page_num,
|
||||||
errnote='Unable to download API page', fatal=False,
|
errnote='Unable to download API page', fatal=False,
|
||||||
data=json.dumps(data).encode('utf8'),
|
data=json.dumps(data).encode('utf8'),
|
||||||
|
query={
|
||||||
|
# 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||||
|
'prettyPrint': 'false',
|
||||||
|
},
|
||||||
headers={'content-type': 'application/json'})
|
headers={'content-type': 'application/json'})
|
||||||
if not search:
|
if not search:
|
||||||
break
|
break
|
||||||
@ -1655,7 +1652,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
assert os.path.basename(func_id) == func_id
|
assert os.path.basename(func_id) == func_id
|
||||||
|
|
||||||
self.write_debug('Extracting signature function {0}'.format(func_id))
|
self.write_debug('Extracting signature function {0}'.format(func_id))
|
||||||
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
|
cache_spec, code = self.cache.load('youtube-sigfuncs', func_id, min_ver='2025.04.07'), None
|
||||||
|
|
||||||
if not cache_spec:
|
if not cache_spec:
|
||||||
code = self._load_player(video_id, player_url, player_id)
|
code = self._load_player(video_id, player_url, player_id)
|
||||||
@ -1816,6 +1813,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
|
func_name, idx = None, None
|
||||||
|
# these special cases are redundant and probably obsolete (2025-04):
|
||||||
|
# they make the tests run ~10% faster without fallback warnings
|
||||||
|
r"""
|
||||||
func_name, idx = self._search_regex(
|
func_name, idx = self._search_regex(
|
||||||
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
|
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
|
||||||
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
# (R="nn"[+J.Z],mW(J),N=J.K[R]||null)&&(N=narray[idx](N),J.set(R,N))}};
|
||||||
@ -1842,9 +1843,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
\(\s*[\w$]+\s*\)
|
\(\s*[\w$]+\s*\)
|
||||||
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
|
||||||
default=(None, None))
|
default=(None, None))
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not func_name:
|
||||||
|
# nfunc=function(x){...}|function nfunc(x); ...
|
||||||
|
# ... var y=[nfunc]|y[idx]=nfunc);
|
||||||
|
# obvious REs hang, so use a two-stage tactic
|
||||||
|
for m in re.finditer(r'''(?x)
|
||||||
|
[\n;]var\s(?:(?:(?!,).)+,|\s)*?(?!\d)[\w$]+(?:\[(?P<idx>\d+)\])?\s*=\s*
|
||||||
|
(?(idx)|\[\s*)(?P<nfunc>(?!\d)[\w$]+)(?(idx)|\s*\])
|
||||||
|
\s*?[;\n]
|
||||||
|
''', jscode):
|
||||||
|
func_name = self._search_regex(
|
||||||
|
r'[;,]\s*(function\s+)?({0})(?(1)|\s*=\s*function)\s*\((?!\d)[\w$]+\)\s*\{1}(?!\s*return\s)'.format(
|
||||||
|
re.escape(m.group('nfunc')), '{'),
|
||||||
|
jscode, 'Initial JS player n function name (2)', group=2, default=None)
|
||||||
|
if func_name:
|
||||||
|
idx = m.group('idx')
|
||||||
|
break
|
||||||
|
|
||||||
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
# thx bashonly: yt-dlp/yt-dlp/pull/10611
|
||||||
if not func_name:
|
if not func_name:
|
||||||
self.report_warning('Falling back to generic n function search')
|
self.report_warning('Falling back to generic n function search', only_once=True)
|
||||||
return self._search_regex(
|
return self._search_regex(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
|
||||||
@ -1858,14 +1878,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
return func_name
|
return func_name
|
||||||
|
|
||||||
return self._search_json(
|
return self._search_json(
|
||||||
r'var\s+{0}\s*='.format(re.escape(func_name)), jscode,
|
r'(?<![\w-])var\s(?:(?:(?!,).)+,|\s)*?{0}\s*='.format(re.escape(func_name)), jscode,
|
||||||
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
'Initial JS player n function list ({0}.{1})'.format(func_name, idx),
|
||||||
func_name, contains_pattern=r'\[[\s\S]+\]', end_pattern='[,;]',
|
func_name, contains_pattern=r'\[.+\]', end_pattern='[,;]',
|
||||||
transform_source=js_to_json)[int(idx)]
|
transform_source=js_to_json)[int(idx)]
|
||||||
|
|
||||||
def _extract_n_function_code(self, video_id, player_url):
|
def _extract_n_function_code(self, video_id, player_url):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
func_code = self.cache.load('youtube-nsig', player_id)
|
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.04.07')
|
||||||
jscode = func_code or self._load_player(video_id, player_url)
|
jscode = func_code or self._load_player(video_id, player_url)
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
|
|
||||||
@ -3339,6 +3359,20 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'thumbnailViewModel', 'image'), final_key='sources'),
|
'thumbnailViewModel', 'image'), final_key='sources'),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def _extract_shorts_lockup_view_model(self, view_model):
|
||||||
|
content_id = traverse_obj(view_model, (
|
||||||
|
'onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId',
|
||||||
|
T(lambda v: v if YoutubeIE.suitable(v) else None)))
|
||||||
|
if not content_id:
|
||||||
|
return
|
||||||
|
return merge_dicts(self.url_result(
|
||||||
|
content_id, ie=YoutubeIE.ie_key(), video_id=content_id), {
|
||||||
|
'title': traverse_obj(view_model, (
|
||||||
|
'overlayMetadata', 'primaryText', 'content', T(compat_str))),
|
||||||
|
'thumbnails': self._extract_thumbnails(
|
||||||
|
view_model, 'thumbnail', final_key='sources'),
|
||||||
|
})
|
||||||
|
|
||||||
def _video_entry(self, video_renderer):
|
def _video_entry(self, video_renderer):
|
||||||
video_id = video_renderer.get('videoId')
|
video_id = video_renderer.get('videoId')
|
||||||
if video_id:
|
if video_id:
|
||||||
@ -3385,10 +3419,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
def _rich_grid_entries(self, contents):
|
def _rich_grid_entries(self, contents):
|
||||||
for content in contents:
|
for content in traverse_obj(
|
||||||
content = traverse_obj(
|
contents, (Ellipsis, 'richItemRenderer', 'content'),
|
||||||
content, ('richItemRenderer', 'content'),
|
expected_type=dict):
|
||||||
expected_type=dict) or {}
|
|
||||||
video_renderer = traverse_obj(
|
video_renderer = traverse_obj(
|
||||||
content, 'videoRenderer', 'reelItemRenderer',
|
content, 'videoRenderer', 'reelItemRenderer',
|
||||||
expected_type=dict)
|
expected_type=dict)
|
||||||
@ -3396,6 +3429,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
entry = self._video_entry(video_renderer)
|
entry = self._video_entry(video_renderer)
|
||||||
if entry:
|
if entry:
|
||||||
yield entry
|
yield entry
|
||||||
|
# shorts item
|
||||||
|
shorts_lockup_view_model = content.get('shortsLockupViewModel')
|
||||||
|
if shorts_lockup_view_model:
|
||||||
|
entry = self._extract_shorts_lockup_view_model(shorts_lockup_view_model)
|
||||||
|
if entry:
|
||||||
|
yield entry
|
||||||
# playlist
|
# playlist
|
||||||
renderer = traverse_obj(
|
renderer = traverse_obj(
|
||||||
content, 'playlistRenderer', expected_type=dict) or {}
|
content, 'playlistRenderer', expected_type=dict) or {}
|
||||||
@ -3434,23 +3473,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
next_continuation = cls._extract_next_continuation_data(renderer)
|
next_continuation = cls._extract_next_continuation_data(renderer)
|
||||||
if next_continuation:
|
if next_continuation:
|
||||||
return next_continuation
|
return next_continuation
|
||||||
contents = []
|
for command in traverse_obj(renderer, (
|
||||||
for key in ('contents', 'items'):
|
('contents', 'items', 'rows'), Ellipsis, 'continuationItemRenderer',
|
||||||
contents.extend(try_get(renderer, lambda x: x[key], list) or [])
|
('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
|
||||||
for content in contents:
|
(('commandExecutorCommand', 'commands', Ellipsis), None), T(dict))):
|
||||||
if not isinstance(content, dict):
|
continuation = traverse_obj(command, ('continuationCommand', 'token', T(compat_str)))
|
||||||
continue
|
|
||||||
continuation_ep = try_get(
|
|
||||||
content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
|
|
||||||
dict)
|
|
||||||
if not continuation_ep:
|
|
||||||
continue
|
|
||||||
continuation = try_get(
|
|
||||||
continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
|
|
||||||
if not continuation:
|
if not continuation:
|
||||||
continue
|
continue
|
||||||
ctp = continuation_ep.get('clickTrackingParams')
|
ctp = command.get('clickTrackingParams')
|
||||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
return cls._build_continuation_query(continuation, ctp)
|
||||||
|
|
||||||
def _entries(self, tab, item_id, webpage):
|
def _entries(self, tab, item_id, webpage):
|
||||||
tab_content = try_get(tab, lambda x: x['content'], dict)
|
tab_content = try_get(tab, lambda x: x['content'], dict)
|
||||||
@ -3499,6 +3530,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
entry = self._video_entry(renderer)
|
entry = self._video_entry(renderer)
|
||||||
if entry:
|
if entry:
|
||||||
yield entry
|
yield entry
|
||||||
|
renderer = isr_content.get('richGridRenderer')
|
||||||
|
if renderer:
|
||||||
|
for from_ in self._rich_grid_entries(
|
||||||
|
traverse_obj(renderer, ('contents', Ellipsis, T(dict)))):
|
||||||
|
yield from_
|
||||||
|
continuation = self._extract_continuation(renderer)
|
||||||
|
continue
|
||||||
|
|
||||||
if not continuation:
|
if not continuation:
|
||||||
continuation = self._extract_continuation(is_renderer)
|
continuation = self._extract_continuation(is_renderer)
|
||||||
@ -3508,8 +3546,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
rich_grid_renderer = tab_content.get('richGridRenderer')
|
rich_grid_renderer = tab_content.get('richGridRenderer')
|
||||||
if not rich_grid_renderer:
|
if not rich_grid_renderer:
|
||||||
return
|
return
|
||||||
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
for from_ in self._rich_grid_entries(
|
||||||
yield entry
|
traverse_obj(rich_grid_renderer, ('contents', Ellipsis, T(dict)))):
|
||||||
|
yield from_
|
||||||
|
|
||||||
continuation = self._extract_continuation(rich_grid_renderer)
|
continuation = self._extract_continuation(rich_grid_renderer)
|
||||||
|
|
||||||
@ -3555,8 +3594,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
# Downloading page may result in intermittent 5xx HTTP error
|
# Downloading page may result in intermittent 5xx HTTP error
|
||||||
# that is usually worked around with a retry
|
# that is usually worked around with a retry
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
'https://www.youtube.com/youtubei/v1/browse',
|
||||||
None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
|
None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''),
|
||||||
|
query={
|
||||||
|
# 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
|
||||||
|
'prettyPrint': 'false',
|
||||||
|
},
|
||||||
headers=headers, data=json.dumps(data).encode('utf8'))
|
headers=headers, data=json.dumps(data).encode('utf8'))
|
||||||
break
|
break
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -240,7 +240,7 @@ def _js_ternary(cndn, if_true=True, if_false=False):
|
|||||||
def _js_unary_op(op):
|
def _js_unary_op(op):
|
||||||
|
|
||||||
@wraps_op(op)
|
@wraps_op(op)
|
||||||
def wrapped(_, a):
|
def wrapped(a, _):
|
||||||
return op(a)
|
return op(a)
|
||||||
|
|
||||||
return wrapped
|
return wrapped
|
||||||
@ -283,17 +283,6 @@ _OPERATORS = (
|
|||||||
('**', _js_exp),
|
('**', _js_exp),
|
||||||
)
|
)
|
||||||
|
|
||||||
_COMP_OPERATORS = (
|
|
||||||
('===', _js_id_op(operator.is_)),
|
|
||||||
('!==', _js_id_op(operator.is_not)),
|
|
||||||
('==', _js_eq),
|
|
||||||
('!=', _js_neq),
|
|
||||||
('<=', _js_comp_op(operator.le)),
|
|
||||||
('>=', _js_comp_op(operator.ge)),
|
|
||||||
('<', _js_comp_op(operator.lt)),
|
|
||||||
('>', _js_comp_op(operator.gt)),
|
|
||||||
)
|
|
||||||
|
|
||||||
_LOG_OPERATORS = (
|
_LOG_OPERATORS = (
|
||||||
('|', _js_bit_op(operator.or_)),
|
('|', _js_bit_op(operator.or_)),
|
||||||
('^', _js_bit_op(operator.xor)),
|
('^', _js_bit_op(operator.xor)),
|
||||||
@ -310,13 +299,27 @@ _SC_OPERATORS = (
|
|||||||
_UNARY_OPERATORS_X = (
|
_UNARY_OPERATORS_X = (
|
||||||
('void', _js_unary_op(lambda _: JS_Undefined)),
|
('void', _js_unary_op(lambda _: JS_Undefined)),
|
||||||
('typeof', _js_unary_op(_js_typeof)),
|
('typeof', _js_unary_op(_js_typeof)),
|
||||||
|
# avoid functools.partial here since Py2 update_wrapper(partial) -> no __module__
|
||||||
|
('!', _js_unary_op(lambda x: _js_ternary(x, if_true=False, if_false=True))),
|
||||||
)
|
)
|
||||||
|
|
||||||
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS))
|
_COMP_OPERATORS = (
|
||||||
|
('===', _js_id_op(operator.is_)),
|
||||||
|
('!==', _js_id_op(operator.is_not)),
|
||||||
|
('==', _js_eq),
|
||||||
|
('!=', _js_neq),
|
||||||
|
('<=', _js_comp_op(operator.le)),
|
||||||
|
('>=', _js_comp_op(operator.ge)),
|
||||||
|
('<', _js_comp_op(operator.lt)),
|
||||||
|
('>', _js_comp_op(operator.gt)),
|
||||||
|
)
|
||||||
|
|
||||||
|
_OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS + _SC_OPERATORS))
|
||||||
|
|
||||||
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
_NAME_RE = r'[a-zA-Z_$][\w$]*'
|
||||||
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
_MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]')))
|
||||||
_QUOTES = '\'"/'
|
_QUOTES = '\'"/'
|
||||||
|
_NESTED_BRACKETS = r'[^[\]]+(?:\[[^[\]]+(?:\[[^\]]+\])?\])?'
|
||||||
|
|
||||||
|
|
||||||
class JS_Break(ExtractorError):
|
class JS_Break(ExtractorError):
|
||||||
@ -353,7 +356,7 @@ class LocalNameSpace(ChainMap):
|
|||||||
raise NotImplementedError('Deleting is not supported')
|
raise NotImplementedError('Deleting is not supported')
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'LocalNameSpace%s' % (self.maps, )
|
return 'LocalNameSpace({0!r})'.format(self.maps)
|
||||||
|
|
||||||
|
|
||||||
class Debugger(object):
|
class Debugger(object):
|
||||||
@ -374,6 +377,9 @@ class Debugger(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def wrap_interpreter(cls, f):
|
def wrap_interpreter(cls, f):
|
||||||
|
if not cls.ENABLED:
|
||||||
|
return f
|
||||||
|
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
|
||||||
if cls.ENABLED and stmt.strip():
|
if cls.ENABLED and stmt.strip():
|
||||||
@ -414,7 +420,17 @@ class JSInterpreter(object):
|
|||||||
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
||||||
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
||||||
|
|
||||||
class JS_RegExp(object):
|
class JS_Object(object):
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if hasattr(self, key):
|
||||||
|
return getattr(self, key)
|
||||||
|
raise KeyError(key)
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
"""Serialise the instance"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
class JS_RegExp(JS_Object):
|
||||||
RE_FLAGS = {
|
RE_FLAGS = {
|
||||||
# special knowledge: Python's re flags are bitmask values, current max 128
|
# special knowledge: Python's re flags are bitmask values, current max 128
|
||||||
# invent new bitmask values well above that for literal parsing
|
# invent new bitmask values well above that for literal parsing
|
||||||
@ -435,16 +451,24 @@ class JSInterpreter(object):
|
|||||||
def __init__(self, pattern_txt, flags=0):
|
def __init__(self, pattern_txt, flags=0):
|
||||||
if isinstance(flags, compat_str):
|
if isinstance(flags, compat_str):
|
||||||
flags, _ = self.regex_flags(flags)
|
flags, _ = self.regex_flags(flags)
|
||||||
# First, avoid https://github.com/python/cpython/issues/74534
|
|
||||||
self.__self = None
|
self.__self = None
|
||||||
pattern_txt = str_or_none(pattern_txt) or '(?:)'
|
pattern_txt = str_or_none(pattern_txt) or '(?:)'
|
||||||
self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
|
# escape unintended embedded flags
|
||||||
|
pattern_txt = re.sub(
|
||||||
|
r'(\(\?)([aiLmsux]*)(-[imsx]+:|(?<!\?)\))',
|
||||||
|
lambda m: ''.join(
|
||||||
|
(re.escape(m.group(1)), m.group(2), re.escape(m.group(3)))
|
||||||
|
if m.group(3) == ')'
|
||||||
|
else ('(?:', m.group(2), m.group(3))),
|
||||||
|
pattern_txt)
|
||||||
|
# Avoid https://github.com/python/cpython/issues/74534
|
||||||
|
self.source = pattern_txt.replace('[[', r'[\[')
|
||||||
self.__flags = flags
|
self.__flags = flags
|
||||||
|
|
||||||
def __instantiate(self):
|
def __instantiate(self):
|
||||||
if self.__self:
|
if self.__self:
|
||||||
return
|
return
|
||||||
self.__self = re.compile(self.__pattern_txt, self.__flags)
|
self.__self = re.compile(self.source, self.__flags)
|
||||||
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
|
# Thx: https://stackoverflow.com/questions/44773522/setattr-on-python2-sre-sre-pattern
|
||||||
for name in dir(self.__self):
|
for name in dir(self.__self):
|
||||||
# Only these? Obviously __class__, __init__.
|
# Only these? Obviously __class__, __init__.
|
||||||
@ -452,16 +476,15 @@ class JSInterpreter(object):
|
|||||||
# that can't be setattr'd but also can't need to be copied.
|
# that can't be setattr'd but also can't need to be copied.
|
||||||
if name in ('__class__', '__init__', '__weakref__'):
|
if name in ('__class__', '__init__', '__weakref__'):
|
||||||
continue
|
continue
|
||||||
setattr(self, name, getattr(self.__self, name))
|
if name == 'flags':
|
||||||
|
setattr(self, name, getattr(self.__self, name, self.__flags))
|
||||||
|
else:
|
||||||
|
setattr(self, name, getattr(self.__self, name))
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
self.__instantiate()
|
self.__instantiate()
|
||||||
# make Py 2.6 conform to its lying documentation
|
if name == 'pattern':
|
||||||
if name == 'flags':
|
self.pattern = self.source
|
||||||
self.flags = self.__flags
|
|
||||||
return self.flags
|
|
||||||
elif name == 'pattern':
|
|
||||||
self.pattern = self.__pattern_txt
|
|
||||||
return self.pattern
|
return self.pattern
|
||||||
elif hasattr(self.__self, name):
|
elif hasattr(self.__self, name):
|
||||||
v = getattr(self.__self, name)
|
v = getattr(self.__self, name)
|
||||||
@ -469,6 +492,26 @@ class JSInterpreter(object):
|
|||||||
return v
|
return v
|
||||||
elif name in ('groupindex', 'groups'):
|
elif name in ('groupindex', 'groups'):
|
||||||
return 0 if name == 'groupindex' else {}
|
return 0 if name == 'groupindex' else {}
|
||||||
|
else:
|
||||||
|
flag_attrs = ( # order by 2nd elt
|
||||||
|
('hasIndices', 'd'),
|
||||||
|
('global', 'g'),
|
||||||
|
('ignoreCase', 'i'),
|
||||||
|
('multiline', 'm'),
|
||||||
|
('dotAll', 's'),
|
||||||
|
('unicode', 'u'),
|
||||||
|
('unicodeSets', 'v'),
|
||||||
|
('sticky', 'y'),
|
||||||
|
)
|
||||||
|
for k, c in flag_attrs:
|
||||||
|
if name == k:
|
||||||
|
return bool(self.RE_FLAGS[c] & self.__flags)
|
||||||
|
else:
|
||||||
|
if name == 'flags':
|
||||||
|
return ''.join(
|
||||||
|
(c if self.RE_FLAGS[c] & self.__flags else '')
|
||||||
|
for _, c in flag_attrs)
|
||||||
|
|
||||||
raise AttributeError('{0} has no attribute named {1}'.format(self, name))
|
raise AttributeError('{0} has no attribute named {1}'.format(self, name))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -482,7 +525,16 @@ class JSInterpreter(object):
|
|||||||
flags |= cls.RE_FLAGS[ch]
|
flags |= cls.RE_FLAGS[ch]
|
||||||
return flags, expr[idx + 1:]
|
return flags, expr[idx + 1:]
|
||||||
|
|
||||||
class JS_Date(object):
|
def dump(self):
|
||||||
|
return '(/{0}/{1})'.format(
|
||||||
|
re.sub(r'(?<!\\)/', r'\/', self.source),
|
||||||
|
self.flags)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def escape(string_):
|
||||||
|
return re.escape(string_)
|
||||||
|
|
||||||
|
class JS_Date(JS_Object):
|
||||||
_t = None
|
_t = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -549,6 +601,9 @@ class JSInterpreter(object):
|
|||||||
def valueOf(self):
|
def valueOf(self):
|
||||||
return _NaN if self._t is None else self._t
|
return _NaN if self._t is None else self._t
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
return '(new Date({0}))'.format(self.toString())
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __op_chars(cls):
|
def __op_chars(cls):
|
||||||
op_chars = set(';,[')
|
op_chars = set(';,[')
|
||||||
@ -652,6 +707,68 @@ class JSInterpreter(object):
|
|||||||
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
|
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS, _UNARY_OPERATORS_X))
|
||||||
return _cached
|
return _cached
|
||||||
|
|
||||||
|
def _separate_at_op(self, expr, max_split=None):
|
||||||
|
|
||||||
|
for op, _ in self._all_operators():
|
||||||
|
# hackety: </> have higher priority than <</>>, but don't confuse them
|
||||||
|
skip_delim = (op + op) if op in '<>*?' else None
|
||||||
|
if op == '?':
|
||||||
|
skip_delim = (skip_delim, '?.')
|
||||||
|
separated = list(self._separate(expr, op, skip_delims=skip_delim))
|
||||||
|
if len(separated) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
right_expr = separated.pop()
|
||||||
|
# handle operators that are both unary and binary, minimal BODMAS
|
||||||
|
if op in ('+', '-'):
|
||||||
|
# simplify/adjust consecutive instances of these operators
|
||||||
|
undone = 0
|
||||||
|
separated = [s.strip() for s in separated]
|
||||||
|
while len(separated) > 1 and not separated[-1]:
|
||||||
|
undone += 1
|
||||||
|
separated.pop()
|
||||||
|
if op == '-' and undone % 2 != 0:
|
||||||
|
right_expr = op + right_expr
|
||||||
|
elif op == '+':
|
||||||
|
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
if separated[-1][-1:] in self.OP_CHARS:
|
||||||
|
right_expr = separated.pop() + right_expr
|
||||||
|
# hanging op at end of left => unary + (strip) or - (push right)
|
||||||
|
separated.append(right_expr)
|
||||||
|
dm_ops = ('*', '%', '/', '**')
|
||||||
|
dm_chars = set(''.join(dm_ops))
|
||||||
|
|
||||||
|
def yield_terms(s):
|
||||||
|
skip = False
|
||||||
|
for i, term in enumerate(s[:-1]):
|
||||||
|
if skip:
|
||||||
|
skip = False
|
||||||
|
continue
|
||||||
|
if not (dm_chars & set(term)):
|
||||||
|
yield term
|
||||||
|
continue
|
||||||
|
for dm_op in dm_ops:
|
||||||
|
bodmas = list(self._separate(term, dm_op, skip_delims=skip_delim))
|
||||||
|
if len(bodmas) > 1 and not bodmas[-1].strip():
|
||||||
|
bodmas[-1] = (op if op == '-' else '') + s[i + 1]
|
||||||
|
yield dm_op.join(bodmas)
|
||||||
|
skip = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if term:
|
||||||
|
yield term
|
||||||
|
|
||||||
|
if not skip and s[-1]:
|
||||||
|
yield s[-1]
|
||||||
|
|
||||||
|
separated = list(yield_terms(separated))
|
||||||
|
right_expr = separated.pop() if len(separated) > 1 else None
|
||||||
|
expr = op.join(separated)
|
||||||
|
if right_expr is None:
|
||||||
|
continue
|
||||||
|
return op, separated, right_expr
|
||||||
|
|
||||||
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion):
|
||||||
if op in ('||', '&&'):
|
if op in ('||', '&&'):
|
||||||
if (op == '&&') ^ _js_ternary(left_val):
|
if (op == '&&') ^ _js_ternary(left_val):
|
||||||
@ -662,7 +779,7 @@ class JSInterpreter(object):
|
|||||||
elif op == '?':
|
elif op == '?':
|
||||||
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
|
right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1))
|
||||||
|
|
||||||
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion)
|
right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) if right_expr else left_val
|
||||||
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
|
opfunc = op and next((v for k, v in self._all_operators() if k == op), None)
|
||||||
if not opfunc:
|
if not opfunc:
|
||||||
return right_val
|
return right_val
|
||||||
@ -707,51 +824,9 @@ class JSInterpreter(object):
|
|||||||
_FINALLY_RE = re.compile(r'finally\s*\{')
|
_FINALLY_RE = re.compile(r'finally\s*\{')
|
||||||
_SWITCH_RE = re.compile(r'switch\s*\(')
|
_SWITCH_RE = re.compile(r'switch\s*\(')
|
||||||
|
|
||||||
def handle_operators(self, expr, local_vars, allow_recursion):
|
def _eval_operator(self, op, left_expr, right_expr, expr, local_vars, allow_recursion):
|
||||||
|
left_val = self.interpret_expression(left_expr, local_vars, allow_recursion)
|
||||||
for op, _ in self._all_operators():
|
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion)
|
||||||
# hackety: </> have higher priority than <</>>, but don't confuse them
|
|
||||||
skip_delim = (op + op) if op in '<>*?' else None
|
|
||||||
if op == '?':
|
|
||||||
skip_delim = (skip_delim, '?.')
|
|
||||||
separated = list(self._separate(expr, op, skip_delims=skip_delim))
|
|
||||||
if len(separated) < 2:
|
|
||||||
continue
|
|
||||||
|
|
||||||
right_expr = separated.pop()
|
|
||||||
# handle operators that are both unary and binary, minimal BODMAS
|
|
||||||
if op in ('+', '-'):
|
|
||||||
# simplify/adjust consecutive instances of these operators
|
|
||||||
undone = 0
|
|
||||||
separated = [s.strip() for s in separated]
|
|
||||||
while len(separated) > 1 and not separated[-1]:
|
|
||||||
undone += 1
|
|
||||||
separated.pop()
|
|
||||||
if op == '-' and undone % 2 != 0:
|
|
||||||
right_expr = op + right_expr
|
|
||||||
elif op == '+':
|
|
||||||
while len(separated) > 1 and set(separated[-1]) <= self.OP_CHARS:
|
|
||||||
right_expr = separated.pop() + right_expr
|
|
||||||
if separated[-1][-1:] in self.OP_CHARS:
|
|
||||||
right_expr = separated.pop() + right_expr
|
|
||||||
# hanging op at end of left => unary + (strip) or - (push right)
|
|
||||||
left_val = separated[-1] if separated else ''
|
|
||||||
for dm_op in ('*', '%', '/', '**'):
|
|
||||||
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
|
|
||||||
if len(bodmas) > 1 and not bodmas[-1].strip():
|
|
||||||
expr = op.join(separated) + op + right_expr
|
|
||||||
if len(separated) > 1:
|
|
||||||
separated.pop()
|
|
||||||
right_expr = op.join((left_val, right_expr))
|
|
||||||
else:
|
|
||||||
separated = [op.join((left_val, right_expr))]
|
|
||||||
right_expr = None
|
|
||||||
break
|
|
||||||
if right_expr is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
|
|
||||||
return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), True
|
|
||||||
|
|
||||||
@Debugger.wrap_interpreter
|
@Debugger.wrap_interpreter
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||||
@ -807,15 +882,19 @@ class JSInterpreter(object):
|
|||||||
else:
|
else:
|
||||||
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
|
raise self.Exception('Unsupported object {obj:.100}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
|
# apply unary operators (see new above)
|
||||||
for op, _ in _UNARY_OPERATORS_X:
|
for op, _ in _UNARY_OPERATORS_X:
|
||||||
if not expr.startswith(op):
|
if not expr.startswith(op):
|
||||||
continue
|
continue
|
||||||
operand = expr[len(op):]
|
operand = expr[len(op):]
|
||||||
if not operand or operand[0] != ' ':
|
if not operand or (op.isalpha() and operand[0] != ' '):
|
||||||
continue
|
continue
|
||||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
separated = self._separate_at_op(operand, max_split=1)
|
||||||
if op_result:
|
if separated:
|
||||||
return op_result[0], should_return
|
next_op, separated, right_expr = separated
|
||||||
|
separated.append(right_expr)
|
||||||
|
operand = next_op.join(separated)
|
||||||
|
return self._eval_operator(op, operand, '', expr, local_vars, allow_recursion), should_return
|
||||||
|
|
||||||
if expr.startswith('{'):
|
if expr.startswith('{'):
|
||||||
inner, outer = self._separate_at_paren(expr)
|
inner, outer = self._separate_at_paren(expr)
|
||||||
@ -1010,15 +1089,18 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
m = re.match(r'''(?x)
|
m = re.match(r'''(?x)
|
||||||
(?P<assign>
|
(?P<assign>
|
||||||
(?P<out>{_NAME_RE})(?:\[(?P<out_idx>(?:.+?\]\s*\[)*.+?)\])?\s*
|
(?P<out>{_NAME_RE})(?P<out_idx>(?:\[{_NESTED_BRACKETS}\])+)?\s*
|
||||||
(?P<op>{_OPERATOR_RE})?
|
(?P<op>{_OPERATOR_RE})?
|
||||||
=(?!=)(?P<expr>.*)$
|
=(?!=)(?P<expr>.*)$
|
||||||
)|(?P<return>
|
)|(?P<return>
|
||||||
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
|
||||||
)|(?P<indexing>
|
|
||||||
(?P<in>{_NAME_RE})\[(?P<in_idx>(?:.+?\]\s*\[)*.+?)\]$
|
|
||||||
)|(?P<attribute>
|
)|(?P<attribute>
|
||||||
(?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
|
(?P<var>{_NAME_RE})(?:
|
||||||
|
(?P<nullish>\?)?\.(?P<member>[^(]+)|
|
||||||
|
\[(?P<member2>{_NESTED_BRACKETS})\]
|
||||||
|
)\s*
|
||||||
|
)|(?P<indexing>
|
||||||
|
(?P<in>{_NAME_RE})(?P<in_idx>\[.+\])$
|
||||||
)|(?P<function>
|
)|(?P<function>
|
||||||
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
||||||
)'''.format(**globals()), expr)
|
)'''.format(**globals()), expr)
|
||||||
@ -1033,10 +1115,11 @@ class JSInterpreter(object):
|
|||||||
elif left_val in (None, JS_Undefined):
|
elif left_val in (None, JS_Undefined):
|
||||||
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
|
raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr)
|
||||||
|
|
||||||
indexes = re.split(r'\]\s*\[', m.group('out_idx'))
|
indexes = md['out_idx']
|
||||||
for i, idx in enumerate(indexes, 1):
|
while indexes:
|
||||||
|
idx, indexes = self._separate_at_paren(indexes)
|
||||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||||
if i < len(indexes):
|
if indexes:
|
||||||
left_val = self._index(left_val, idx)
|
left_val = self._index(left_val, idx)
|
||||||
if isinstance(idx, float):
|
if isinstance(idx, float):
|
||||||
idx = int(idx)
|
idx = int(idx)
|
||||||
@ -1081,14 +1164,17 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
if md.get('indexing'):
|
if md.get('indexing'):
|
||||||
val = local_vars[m.group('in')]
|
val = local_vars[m.group('in')]
|
||||||
for idx in re.split(r'\]\s*\[', m.group('in_idx')):
|
indexes = m.group('in_idx')
|
||||||
|
while indexes:
|
||||||
|
idx, indexes = self._separate_at_paren(indexes)
|
||||||
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
idx = self.interpret_expression(idx, local_vars, allow_recursion)
|
||||||
val = self._index(val, idx)
|
val = self._index(val, idx)
|
||||||
return val, should_return
|
return val, should_return
|
||||||
|
|
||||||
op_result = self.handle_operators(expr, local_vars, allow_recursion)
|
separated = self._separate_at_op(expr)
|
||||||
if op_result:
|
if separated:
|
||||||
return op_result[0], should_return
|
op, separated, right_expr = separated
|
||||||
|
return self._eval_operator(op, op.join(separated), right_expr, expr, local_vars, allow_recursion), should_return
|
||||||
|
|
||||||
if md.get('attribute'):
|
if md.get('attribute'):
|
||||||
variable, member, nullish = m.group('var', 'member', 'nullish')
|
variable, member, nullish = m.group('var', 'member', 'nullish')
|
||||||
@ -1109,13 +1195,15 @@ class JSInterpreter(object):
|
|||||||
def eval_method(variable, member):
|
def eval_method(variable, member):
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
if Debugger.ENABLED:
|
if Debugger.ENABLED:
|
||||||
Debugger.write(self.interpret_expression('[{}]'.format(arg_str), local_vars, allow_recursion))
|
Debugger.write(self.interpret_expression('[{0}]'.format(arg_str), local_vars, allow_recursion))
|
||||||
return
|
return
|
||||||
types = {
|
types = {
|
||||||
'String': compat_str,
|
'String': compat_str,
|
||||||
'Math': float,
|
'Math': float,
|
||||||
'Array': list,
|
'Array': list,
|
||||||
'Date': self.JS_Date,
|
'Date': self.JS_Date,
|
||||||
|
'RegExp': self.JS_RegExp,
|
||||||
|
# 'Error': self.Exception, # has no std static methods
|
||||||
}
|
}
|
||||||
obj = local_vars.get(variable)
|
obj = local_vars.get(variable)
|
||||||
if obj in (JS_Undefined, None):
|
if obj in (JS_Undefined, None):
|
||||||
@ -1123,7 +1211,7 @@ class JSInterpreter(object):
|
|||||||
if obj is JS_Undefined:
|
if obj is JS_Undefined:
|
||||||
try:
|
try:
|
||||||
if variable not in self._objects:
|
if variable not in self._objects:
|
||||||
self._objects[variable] = self.extract_object(variable)
|
self._objects[variable] = self.extract_object(variable, local_vars)
|
||||||
obj = self._objects[variable]
|
obj = self._objects[variable]
|
||||||
except self.Exception:
|
except self.Exception:
|
||||||
if not nullish:
|
if not nullish:
|
||||||
@ -1134,7 +1222,7 @@ class JSInterpreter(object):
|
|||||||
|
|
||||||
# Member access
|
# Member access
|
||||||
if arg_str is None:
|
if arg_str is None:
|
||||||
return self._index(obj, member)
|
return self._index(obj, member, nullish)
|
||||||
|
|
||||||
# Function call
|
# Function call
|
||||||
argvals = [
|
argvals = [
|
||||||
@ -1277,7 +1365,8 @@ class JSInterpreter(object):
|
|||||||
assertion(len(argvals) == 2, 'takes exactly two arguments')
|
assertion(len(argvals) == 2, 'takes exactly two arguments')
|
||||||
# TODO: argvals[1] callable, other Py vs JS edge cases
|
# TODO: argvals[1] callable, other Py vs JS edge cases
|
||||||
if isinstance(argvals[0], self.JS_RegExp):
|
if isinstance(argvals[0], self.JS_RegExp):
|
||||||
count = 0 if argvals[0].flags & self.JS_RegExp.RE_FLAGS['g'] else 1
|
# access JS member with Py reserved name
|
||||||
|
count = 0 if self._index(argvals[0], 'global') else 1
|
||||||
assertion(member != 'replaceAll' or count == 0,
|
assertion(member != 'replaceAll' or count == 0,
|
||||||
'replaceAll must be called with a global RegExp')
|
'replaceAll must be called with a global RegExp')
|
||||||
return argvals[0].sub(argvals[1], obj, count=count)
|
return argvals[0].sub(argvals[1], obj, count=count)
|
||||||
@ -1318,7 +1407,7 @@ class JSInterpreter(object):
|
|||||||
for v in self._separate(list_txt):
|
for v in self._separate(list_txt):
|
||||||
yield self.interpret_expression(v, local_vars, allow_recursion)
|
yield self.interpret_expression(v, local_vars, allow_recursion)
|
||||||
|
|
||||||
def extract_object(self, objname):
|
def extract_object(self, objname, *global_stack):
|
||||||
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
_FUNC_NAME_RE = r'''(?:{n}|"{n}"|'{n}')'''.format(n=_NAME_RE)
|
||||||
obj = {}
|
obj = {}
|
||||||
fields = next(filter(None, (
|
fields = next(filter(None, (
|
||||||
@ -1339,7 +1428,8 @@ class JSInterpreter(object):
|
|||||||
fields):
|
fields):
|
||||||
argnames = self.build_arglist(f.group('args'))
|
argnames = self.build_arglist(f.group('args'))
|
||||||
name = remove_quotes(f.group('key'))
|
name = remove_quotes(f.group('key'))
|
||||||
obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), 'F<{0}>'.format(name))
|
obj[name] = function_with_repr(
|
||||||
|
self.build_function(argnames, f.group('code'), *global_stack), 'F<{0}>'.format(name))
|
||||||
|
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
@ -4204,12 +4204,16 @@ def lowercase_escape(s):
|
|||||||
s)
|
s)
|
||||||
|
|
||||||
|
|
||||||
def escape_rfc3986(s):
|
def escape_rfc3986(s, safe=None):
|
||||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
s = _encode_compat_str(s, 'utf-8')
|
s = _encode_compat_str(s, 'utf-8')
|
||||||
|
if safe is not None:
|
||||||
|
safe = _encode_compat_str(safe, 'utf-8')
|
||||||
|
if safe is None:
|
||||||
|
safe = b"%/;:@&=+$,!~*'()?#[]"
|
||||||
# ensure unicode: after quoting, it can always be converted
|
# ensure unicode: after quoting, it can always be converted
|
||||||
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
|
return compat_str(compat_urllib_parse.quote(s, safe))
|
||||||
|
|
||||||
|
|
||||||
def escape_url(url):
|
def escape_url(url):
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2021.12.17'
|
__version__ = '2025.04.07'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user