Compare commits

...

27 Commits

Author SHA1 Message Date
Valter Vicente
d7ea1fcc17
Merge 1058c29c74453e80ad918889a0101894a370c9b7 into da7223d4aa42ff9fc680b0951d043dd03cec2d30 2025-03-22 07:11:32 +08:00
dirkf
da7223d4aa [YouTube] Improve support for tce-style player JS
* improve extraction of global "useful data" Array from player JS
* also handle tv-player and add tests: thx seproDev (yt-dlp/yt-dlp#12684)

Co-Authored-By: sepro <sepro@sepr0.com>
2025-03-21 16:26:25 +00:00
dirkf
37c2440d6a [YouTube] Update player client data
thx seproDev (yt-dlp/yt-dlp#12603)

Co-authored-by: sepro <sepro@sepr0.com>
2025-03-21 16:13:24 +00:00
dirkf
420d53387c [JSInterp] Improve tests
* from yt-dlp/yt-dlp#12313
* also fix d7c2708
2025-03-11 02:00:24 +00:00
dirkf
32f89de92b [YouTube] Update TVHTML5 client parameters
* resolves #33078
2025-03-11 02:00:24 +00:00
dirkf
283dca56fe [YouTube] Initially support tce-style player JS
* resolves #33079
2025-03-11 02:00:24 +00:00
dirkf
422b1b31cf [YouTube] Temporarily redirect from tce-style player JS 2025-03-11 02:00:24 +00:00
dirkf
1dc27e1c3b [JSInterp] Make indexing error handling more conformant
* by default TypeError -> undefined, else raise
* set allow_undefined=True/False to override
2025-03-11 02:00:24 +00:00
dirkf
af049e309b [JSInterp] Handle undefined, etc, passed to JS_RegExp and Exception 2025-03-11 02:00:24 +00:00
dirkf
94849bc997 [JSInterp] Improve Date processing
* add JS_Date class implementing JS Date
* support constructor args other than date string
* support static methods of Date
* Date objects are still automatically coerced to timestamp before using in JS.
2025-03-11 02:00:24 +00:00
dirkf
974c7d7f34 [compat] Fix inheriting from compat_collections_chain_map
* see ytdl-org/youtube-dl#33079#issuecomment-2704038049
2025-03-11 02:00:24 +00:00
dirkf
8738407d77 [compat] Support zstd Content-Encoding
* see RFC 8878 7.2
2025-03-11 02:00:24 +00:00
dirkf
cecaa18b80 [compat] Clean-up
* make workaround_optparse_bug9161 private
* add comments
* avoid leaving test objects behind
2025-03-11 02:00:24 +00:00
vallovic
1058c29c74 Make extractor compatible with Python 2.6+ 2021-10-31 11:16:05 +00:00
vallovic
6bde4a1377 Fix 'RTP Arquivos' downloads, allow subtitles download, more tests, general cleanup 2021-08-28 21:10:18 +01:00
vallovic
89a75524e6 Merge remote-tracking branch 'upstream/master' 2021-08-28 17:33:40 +01:00
vallovic
31595de72b Merge remote-tracking branch 'upstream/master' 2021-05-13 21:33:03 +01:00
vallovic
5a7243b741 Working around new URLs 2021-05-06 23:20:20 +01:00
vallovic
d30180d4f3 Consider multi-part videos in filename output 2021-04-23 23:02:04 +01:00
vallovic
ff47d11269 Ignore EVEN more comments in RTP source code 2021-04-21 16:24:26 +01:00
vallovic
7f40887b29 Ignore more comments in RTP source code 2021-04-13 21:03:59 +01:00
vallovic
c1a696ee40 Merge remote-tracking branch 'upstream/master' 2021-04-13 13:05:15 +01:00
vallovic
b955e0a5dc RTP devs like to try out different approaches 2021-02-21 14:05:42 +00:00
vallovic
030a5713e6 Merge remote-tracking branch 'upstream/master' 2021-02-21 11:00:40 +00:00
vallovic
a85625977d 'Estudo em Casa' wasn't working since RTP has a lot of ways of dealing with their code 2021-02-20 20:42:33 +00:00
vallovic
fd733f52f5 Merge remote-tracking branch 'upstream/master' 2021-02-20 20:40:33 +00:00
vallovic
35779eda7a Dealing with RTP latest changes 2021-02-19 14:10:10 +00:00
7 changed files with 471 additions and 94 deletions

View File

@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import math import math
import re import re
import time
from youtube_dl.compat import compat_str as str from youtube_dl.compat import compat_str as str
from youtube_dl.jsinterp import JS_Undefined, JSInterpreter from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
@ -208,6 +209,34 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT']) self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
# epoch 0 # epoch 0
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC']) self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
# undefined
self._test(jsi, NaN, args=[JS_Undefined])
# y,m,d, ... - may fail with older dates lacking DST data
jsi = JSInterpreter(
'function f() { return new Date(%s); }'
% ('2024, 5, 29, 2, 52, 12, 42',))
self._test(jsi, (
1719625932042 # UK value
+ (
+ 3600 # back to GMT
+ (time.altzone if time.daylight # host's DST
else time.timezone)
) * 1000))
# no arg
self.assertAlmostEqual(JSInterpreter(
'function f() { return new Date() - 0; }').call_function('f'),
time.time() * 1000, delta=100)
# Date.now()
self.assertAlmostEqual(JSInterpreter(
'function f() { return Date.now(); }').call_function('f'),
time.time() * 1000, delta=100)
# Date.parse()
jsi = JSInterpreter('function f(dt) { return Date.parse(dt); }')
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
# Date.UTC()
jsi = JSInterpreter('function f() { return Date.UTC(%s); }'
% ('1970, 0, 1, 0, 0, 0, 0',))
self._test(jsi, 0)
def test_call(self): def test_call(self):
jsi = JSInterpreter(''' jsi = JSInterpreter('''
@ -463,6 +492,14 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return NaN << 42}', 0) self._test('function f(){return NaN << 42}', 0)
self._test('function f(){return "21.9" << 1}', 42) self._test('function f(){return "21.9" << 1}', 42)
self._test('function f(){return 21 << 4294967297}', 42) self._test('function f(){return 21 << 4294967297}', 42)
self._test('function f(){return true << "5";}', 32)
self._test('function f(){return true << true;}', 2)
self._test('function f(){return "19" & "21.9";}', 17)
self._test('function f(){return "19" & false;}', 0)
self._test('function f(){return "11.0" >> "2.1";}', 2)
self._test('function f(){return 5 ^ 9;}', 12)
self._test('function f(){return 0.0 << NaN}', 0)
self._test('function f(){return null << undefined}', 0)
def test_negative(self): def test_negative(self):
self._test('function f(){return 2 * -2.0 ;}', -4) self._test('function f(){return 2 * -2.0 ;}', -4)

View File

@ -223,6 +223,42 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg', 'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
), ),
(
'https://www.youtube.com/s/player/f6e09c70/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'W9HJZKktxuYoDTqW', 'larxUlagTRAcSw',
),
(
'https://www.youtube.com/s/player/e7567ecf/player_ias_tce.vflset/en_US/base.js',
'Sy4aDGc0VpYRR9ew_', '5UPOT1VhoZxNLQ',
),
(
'https://www.youtube.com/s/player/d50f54ef/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'XFTb2HoeOE5MHg',
),
(
'https://www.youtube.com/s/player/074a8365/player_ias_tce.vflset/en_US/base.js',
'Ha7507LzRmH3Utygtj', 'ufTsrE0IVYrkl8v',
),
(
'https://www.youtube.com/s/player/643afba4/player_ias.vflset/en_US/base.js',
'N5uAlLqm0eg1GyHO', 'dCBQOejdq5s-ww',
),
(
'https://www.youtube.com/s/player/69f581a5/tv-player-ias.vflset/tv-player-ias.js',
'-qIP447rVlTTwaZjY', 'KNcGOksBAvwqQg',
),
(
'https://www.youtube.com/s/player/643afba4/tv-player-ias.vflset/tv-player-ias.js',
'ir9-V6cdbCiyKxhr', '2PL7ZDYAALMfmA',
),
] ]
@ -284,7 +320,7 @@ def t_factory(name, sig_func, url_pattern):
def signature(jscode, sig_input): def signature(jscode, sig_input):
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) func = YoutubeIE(FakeYDL({'cachedir': False}))._parse_sig_js(jscode)
src_sig = ( src_sig = (
compat_str(string.printable[:sig_input]) compat_str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input) if isinstance(sig_input, int) else sig_input)
@ -292,9 +328,10 @@ def signature(jscode, sig_input):
def n_sig(jscode, sig_input): def n_sig(jscode, sig_input):
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) ie = YoutubeIE(FakeYDL({'cachedir': False}))
return JSInterpreter(jscode).call_function( jsi = JSInterpreter(jscode)
funcname, sig_input, _ytdl_do_not_return=sig_input) jsi, _, func_code = ie._extract_n_function_code_jsi(sig_input, jsi)
return ie._extract_n_function_from_code(jsi, func_code)(sig_input)
make_sig_test = t_factory( make_sig_test = t_factory(

View File

@ -18,7 +18,7 @@ from .compat import (
compat_getpass, compat_getpass,
compat_register_utf8, compat_register_utf8,
compat_shlex_split, compat_shlex_split,
workaround_optparse_bug9161, _workaround_optparse_bug9161,
) )
from .utils import ( from .utils import (
_UnsafeExtensionError, _UnsafeExtensionError,
@ -50,7 +50,7 @@ def _real_main(argv=None):
# Compatibility fix for Windows # Compatibility fix for Windows
compat_register_utf8() compat_register_utf8()
workaround_optparse_bug9161() _workaround_optparse_bug9161()
setproctitle('youtube-dl') setproctitle('youtube-dl')

View File

@ -16,7 +16,6 @@ import os
import platform import platform
import re import re
import shlex import shlex
import shutil
import socket import socket
import struct import struct
import subprocess import subprocess
@ -24,11 +23,15 @@ import sys
import types import types
import xml.etree.ElementTree import xml.etree.ElementTree
_IDENTITY = lambda x: x
# naming convention # naming convention
# 'compat_' + Python3_name.replace('.', '_') # 'compat_' + Python3_name.replace('.', '_')
# other aliases exist for convenience and/or legacy # other aliases exist for convenience and/or legacy
# wrap disposable test values in type() to reclaim storage
# deal with critical unicode/str things first # deal with critical unicode/str things first:
# compat_str, compat_basestring, compat_chr
try: try:
# Python 2 # Python 2
compat_str, compat_basestring, compat_chr = ( compat_str, compat_basestring, compat_chr = (
@ -39,18 +42,23 @@ except NameError:
str, (str, bytes), chr str, (str, bytes), chr
) )
# casefold
# compat_casefold
try: try:
compat_str.casefold compat_str.casefold
compat_casefold = lambda s: s.casefold() compat_casefold = lambda s: s.casefold()
except AttributeError: except AttributeError:
from .casefold import _casefold as compat_casefold from .casefold import _casefold as compat_casefold
# compat_collections_abc
try: try:
import collections.abc as compat_collections_abc import collections.abc as compat_collections_abc
except ImportError: except ImportError:
import collections as compat_collections_abc import collections as compat_collections_abc
# compat_urllib_request
try: try:
import urllib.request as compat_urllib_request import urllib.request as compat_urllib_request
except ImportError: # Python 2 except ImportError: # Python 2
@ -79,11 +87,15 @@ except TypeError:
_add_init_method_arg(compat_urllib_request.Request) _add_init_method_arg(compat_urllib_request.Request)
del _add_init_method_arg del _add_init_method_arg
# compat_urllib_error
try: try:
import urllib.error as compat_urllib_error import urllib.error as compat_urllib_error
except ImportError: # Python 2 except ImportError: # Python 2
import urllib2 as compat_urllib_error import urllib2 as compat_urllib_error
# compat_urllib_parse
try: try:
import urllib.parse as compat_urllib_parse import urllib.parse as compat_urllib_parse
except ImportError: # Python 2 except ImportError: # Python 2
@ -98,17 +110,23 @@ except ImportError: # Python 2
compat_urlparse = compat_urllib_parse compat_urlparse = compat_urllib_parse
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
# compat_urllib_response
try: try:
import urllib.response as compat_urllib_response import urllib.response as compat_urllib_response
except ImportError: # Python 2 except ImportError: # Python 2
import urllib as compat_urllib_response import urllib as compat_urllib_response
# compat_urllib_response.addinfourl
try: try:
compat_urllib_response.addinfourl.status compat_urllib_response.addinfourl.status
except AttributeError: except AttributeError:
# .getcode() is deprecated in Py 3. # .getcode() is deprecated in Py 3.
compat_urllib_response.addinfourl.status = property(lambda self: self.getcode()) compat_urllib_response.addinfourl.status = property(lambda self: self.getcode())
# compat_http_cookiejar
try: try:
import http.cookiejar as compat_cookiejar import http.cookiejar as compat_cookiejar
except ImportError: # Python 2 except ImportError: # Python 2
@ -127,12 +145,16 @@ else:
compat_cookiejar_Cookie = compat_cookiejar.Cookie compat_cookiejar_Cookie = compat_cookiejar.Cookie
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
# compat_http_cookies
try: try:
import http.cookies as compat_cookies import http.cookies as compat_cookies
except ImportError: # Python 2 except ImportError: # Python 2
import Cookie as compat_cookies import Cookie as compat_cookies
compat_http_cookies = compat_cookies compat_http_cookies = compat_cookies
# compat_http_cookies_SimpleCookie
if sys.version_info[0] == 2 or sys.version_info < (3, 3): if sys.version_info[0] == 2 or sys.version_info < (3, 3):
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie): class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
def load(self, rawdata): def load(self, rawdata):
@ -155,11 +177,15 @@ else:
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
# compat_html_entities, probably useless now
try: try:
import html.entities as compat_html_entities import html.entities as compat_html_entities
except ImportError: # Python 2 except ImportError: # Python 2
import htmlentitydefs as compat_html_entities import htmlentitydefs as compat_html_entities
# compat_html_entities_html5
try: # Python >= 3.3 try: # Python >= 3.3
compat_html_entities_html5 = compat_html_entities.html5 compat_html_entities_html5 = compat_html_entities.html5
except AttributeError: except AttributeError:
@ -2408,18 +2434,24 @@ except AttributeError:
# Py < 3.1 # Py < 3.1
compat_http_client.HTTPResponse.getcode = lambda self: self.status compat_http_client.HTTPResponse.getcode = lambda self: self.status
# compat_urllib_HTTPError
try: try:
from urllib.error import HTTPError as compat_HTTPError from urllib.error import HTTPError as compat_HTTPError
except ImportError: # Python 2 except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError from urllib2 import HTTPError as compat_HTTPError
compat_urllib_HTTPError = compat_HTTPError compat_urllib_HTTPError = compat_HTTPError
# compat_urllib_request_urlretrieve
try: try:
from urllib.request import urlretrieve as compat_urlretrieve from urllib.request import urlretrieve as compat_urlretrieve
except ImportError: # Python 2 except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve from urllib import urlretrieve as compat_urlretrieve
compat_urllib_request_urlretrieve = compat_urlretrieve compat_urllib_request_urlretrieve = compat_urlretrieve
# compat_html_parser_HTMLParser, compat_html_parser_HTMLParseError
try: try:
from HTMLParser import ( from HTMLParser import (
HTMLParser as compat_HTMLParser, HTMLParser as compat_HTMLParser,
@ -2432,22 +2464,33 @@ except ImportError: # Python 3
# HTMLParseError was deprecated in Python 3.3 and removed in # HTMLParseError was deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exception handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass
compat_html_parser_HTMLParser = compat_HTMLParser compat_html_parser_HTMLParser = compat_HTMLParser
compat_html_parser_HTMLParseError = compat_HTMLParseError compat_html_parser_HTMLParseError = compat_HTMLParseError
# compat_subprocess_get_DEVNULL
try: try:
_DEVNULL = subprocess.DEVNULL _DEVNULL = subprocess.DEVNULL
compat_subprocess_get_DEVNULL = lambda: _DEVNULL compat_subprocess_get_DEVNULL = lambda: _DEVNULL
except AttributeError: except AttributeError:
compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
# compat_http_server
try: try:
import http.server as compat_http_server import http.server as compat_http_server
except ImportError: except ImportError:
import BaseHTTPServer as compat_http_server import BaseHTTPServer as compat_http_server
# compat_urllib_parse_unquote_to_bytes,
# compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus,
# compat_urllib_parse_urlencode,
# compat_urllib_parse_parse_qs
try: try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote from urllib.parse import unquote as compat_urllib_parse_unquote
@ -2598,6 +2641,8 @@ except ImportError: # Python 2
compat_urllib_parse_parse_qs = compat_parse_qs compat_urllib_parse_parse_qs = compat_parse_qs
# compat_urllib_request_DataHandler
try: try:
from urllib.request import DataHandler as compat_urllib_request_DataHandler from urllib.request import DataHandler as compat_urllib_request_DataHandler
except ImportError: # Python < 3.4 except ImportError: # Python < 3.4
@ -2632,16 +2677,20 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url) return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
# compat_xml_etree_ElementTree_ParseError
try: try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6 except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error from xml.parsers.expat import ExpatError as compat_xml_parse_error
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
etree = xml.etree.ElementTree
# compat_xml_etree_ElementTree_Element
_etree = xml.etree.ElementTree
class _TreeBuilder(etree.TreeBuilder): class _TreeBuilder(_etree.TreeBuilder):
def doctype(self, name, pubid, system): def doctype(self, name, pubid, system):
pass pass
@ -2650,7 +2699,7 @@ try:
# xml.etree.ElementTree.Element is a method in Python <=2.6 and # xml.etree.ElementTree.Element is a method in Python <=2.6 and
# the following will crash with: # the following will crash with:
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
isinstance(None, etree.Element) isinstance(None, _etree.Element)
from xml.etree.ElementTree import Element as compat_etree_Element from xml.etree.ElementTree import Element as compat_etree_Element
except TypeError: # Python <=2.6 except TypeError: # Python <=2.6
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
@ -2658,12 +2707,12 @@ compat_xml_etree_ElementTree_Element = compat_etree_Element
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
def compat_etree_fromstring(text): def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) return _etree.XML(text, parser=_etree.XMLParser(target=_TreeBuilder()))
else: else:
# python 2.x tries to encode unicode strings with ascii (see the # python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method) # XMLParser._fixtext method)
try: try:
_etree_iter = etree.Element.iter _etree_iter = _etree.Element.iter
except AttributeError: # Python <=2.6 except AttributeError: # Python <=2.6
def _etree_iter(root): def _etree_iter(root):
for el in root.findall('*'): for el in root.findall('*'):
@ -2675,27 +2724,29 @@ else:
# 2.7 source # 2.7 source
def _XML(text, parser=None): def _XML(text, parser=None):
if not parser: if not parser:
parser = etree.XMLParser(target=_TreeBuilder()) parser = _etree.XMLParser(target=_TreeBuilder())
parser.feed(text) parser.feed(text)
return parser.close() return parser.close()
def _element_factory(*args, **kwargs): def _element_factory(*args, **kwargs):
el = etree.Element(*args, **kwargs) el = _etree.Element(*args, **kwargs)
for k, v in el.items(): for k, v in el.items():
if isinstance(v, bytes): if isinstance(v, bytes):
el.set(k, v.decode('utf-8')) el.set(k, v.decode('utf-8'))
return el return el
def compat_etree_fromstring(text): def compat_etree_fromstring(text):
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) doc = _XML(text, parser=_etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc): for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes): if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8') el.text = el.text.decode('utf-8')
return doc return doc
if hasattr(etree, 'register_namespace'):
compat_etree_register_namespace = etree.register_namespace # compat_xml_etree_register_namespace
else: try:
compat_etree_register_namespace = _etree.register_namespace
except AttributeError:
def compat_etree_register_namespace(prefix, uri): def compat_etree_register_namespace(prefix, uri):
"""Register a namespace prefix. """Register a namespace prefix.
The registry is global, and any existing mapping for either the The registry is global, and any existing mapping for either the
@ -2704,14 +2755,16 @@ else:
attributes in this namespace will be serialized with prefix if possible. attributes in this namespace will be serialized with prefix if possible.
ValueError is raised if prefix is reserved or is invalid. ValueError is raised if prefix is reserved or is invalid.
""" """
if re.match(r"ns\d+$", prefix): if re.match(r'ns\d+$', prefix):
raise ValueError("Prefix format reserved for internal use") raise ValueError('Prefix format reserved for internal use')
for k, v in list(etree._namespace_map.items()): for k, v in list(_etree._namespace_map.items()):
if k == uri or v == prefix: if k == uri or v == prefix:
del etree._namespace_map[k] del _etree._namespace_map[k]
etree._namespace_map[uri] = prefix _etree._namespace_map[uri] = prefix
compat_xml_etree_register_namespace = compat_etree_register_namespace compat_xml_etree_register_namespace = compat_etree_register_namespace
# compat_xpath, compat_etree_iterfind
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode, # Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . ! # .//node does not match if a node is a direct child of . !
@ -2898,7 +2951,6 @@ if sys.version_info < (2, 7):
def __init__(self, root): def __init__(self, root):
self.root = root self.root = root
##
# Generate all matching objects. # Generate all matching objects.
def compat_etree_iterfind(elem, path, namespaces=None): def compat_etree_iterfind(elem, path, namespaces=None):
@ -2933,13 +2985,15 @@ if sys.version_info < (2, 7):
else: else:
compat_xpath = lambda xpath: xpath
compat_etree_iterfind = lambda element, match: element.iterfind(match) compat_etree_iterfind = lambda element, match: element.iterfind(match)
compat_xpath = _IDENTITY
# compat_os_name
compat_os_name = os._name if os.name == 'java' else os.name compat_os_name = os._name if os.name == 'java' else os.name
# compat_shlex_quote
if compat_os_name == 'nt': if compat_os_name == 'nt':
def compat_shlex_quote(s): def compat_shlex_quote(s):
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
@ -2954,6 +3008,7 @@ else:
return "'" + s.replace("'", "'\"'\"'") + "'" return "'" + s.replace("'", "'\"'\"'") + "'"
# compat_shlex.split
try: try:
args = shlex.split('中文') args = shlex.split('中文')
assert (isinstance(args, list) assert (isinstance(args, list)
@ -2969,6 +3024,7 @@ except (AssertionError, UnicodeEncodeError):
return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix))) return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
# compat_ord
def compat_ord(c): def compat_ord(c):
if isinstance(c, int): if isinstance(c, int):
return c return c
@ -2976,6 +3032,7 @@ def compat_ord(c):
return ord(c) return ord(c)
# compat_getenv, compat_os_path_expanduser, compat_setenv
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
compat_getenv = os.getenv compat_getenv = os.getenv
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
@ -3063,6 +3120,7 @@ else:
compat_os_path_expanduser = compat_expanduser compat_os_path_expanduser = compat_expanduser
# compat_os_path_realpath
if compat_os_name == 'nt' and sys.version_info < (3, 8): if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links # os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949) # prior to Python 3.8 (see https://bugs.python.org/issue9949)
@ -3076,6 +3134,7 @@ else:
compat_os_path_realpath = compat_realpath compat_os_path_realpath = compat_realpath
# compat_print
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
def compat_print(s): def compat_print(s):
from .utils import preferredencoding from .utils import preferredencoding
@ -3086,6 +3145,7 @@ else:
print(s) print(s)
# compat_getpass_getpass
if sys.version_info < (3, 0) and sys.platform == 'win32': if sys.version_info < (3, 0) and sys.platform == 'win32':
def compat_getpass(prompt, *args, **kwargs): def compat_getpass(prompt, *args, **kwargs):
if isinstance(prompt, compat_str): if isinstance(prompt, compat_str):
@ -3098,22 +3158,22 @@ else:
compat_getpass_getpass = compat_getpass compat_getpass_getpass = compat_getpass
# compat_input
try: try:
compat_input = raw_input compat_input = raw_input
except NameError: # Python 3 except NameError: # Python 3
compat_input = input compat_input = input
# compat_kwargs
# Python < 2.6.5 require kwargs to be bytes # Python < 2.6.5 require kwargs to be bytes
try: try:
def _testfunc(x): (lambda x: x)(**{'x': 0})
pass
_testfunc(**{'x': 0})
except TypeError: except TypeError:
def compat_kwargs(kwargs): def compat_kwargs(kwargs):
return dict((bytes(k), v) for k, v in kwargs.items()) return dict((bytes(k), v) for k, v in kwargs.items())
else: else:
compat_kwargs = lambda kwargs: kwargs compat_kwargs = _IDENTITY
# compat_numeric_types # compat_numeric_types
@ -3132,6 +3192,8 @@ except NameError: # Python 3
# compat_int # compat_int
compat_int = compat_integer_types[-1] compat_int = compat_integer_types[-1]
# compat_socket_create_connection
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
def compat_socket_create_connection(address, timeout, source_address=None): def compat_socket_create_connection(address, timeout, source_address=None):
host, port = address host, port = address
@ -3158,6 +3220,7 @@ else:
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
# compat_contextlib_suppress
try: try:
from contextlib import suppress as compat_contextlib_suppress from contextlib import suppress as compat_contextlib_suppress
except ImportError: except ImportError:
@ -3200,12 +3263,12 @@ except AttributeError:
# repeated .close() is OK, but just in case # repeated .close() is OK, but just in case
with compat_contextlib_suppress(EnvironmentError): with compat_contextlib_suppress(EnvironmentError):
f.close() f.close()
popen.wait() popen.wait()
# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 # Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken # See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161(): def _workaround_optparse_bug9161():
op = optparse.OptionParser() op = optparse.OptionParser()
og = optparse.OptionGroup(op, 'foo') og = optparse.OptionGroup(op, 'foo')
try: try:
@ -3224,9 +3287,10 @@ def workaround_optparse_bug9161():
optparse.OptionGroup.add_option = _compat_add_option optparse.OptionGroup.add_option = _compat_add_option
if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 # compat_shutil_get_terminal_size
compat_get_terminal_size = shutil.get_terminal_size try:
else: from shutil import get_terminal_size as compat_get_terminal_size # Python >= 3.3
except ImportError:
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines']) _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
def compat_get_terminal_size(fallback=(80, 24)): def compat_get_terminal_size(fallback=(80, 24)):
@ -3256,27 +3320,33 @@ else:
columns = _columns columns = _columns
if lines is None or lines <= 0: if lines is None or lines <= 0:
lines = _lines lines = _lines
return _terminal_size(columns, lines) return _terminal_size(columns, lines)
compat_shutil_get_terminal_size = compat_get_terminal_size
# compat_itertools_count
try: try:
itertools.count(start=0, step=1) type(itertools.count(start=0, step=1))
compat_itertools_count = itertools.count compat_itertools_count = itertools.count
except TypeError: # Python 2.6 except TypeError: # Python 2.6 lacks step
def compat_itertools_count(start=0, step=1): def compat_itertools_count(start=0, step=1):
while True: while True:
yield start yield start
start += step start += step
# compat_tokenize_tokenize
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
from tokenize import tokenize as compat_tokenize_tokenize from tokenize import tokenize as compat_tokenize_tokenize
else: else:
from tokenize import generate_tokens as compat_tokenize_tokenize from tokenize import generate_tokens as compat_tokenize_tokenize
# compat_struct_pack, compat_struct_unpack, compat_Struct
try: try:
struct.pack('!I', 0) type(struct.pack('!I', 0))
except TypeError: except TypeError:
# In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
# See https://bugs.python.org/issue19099 # See https://bugs.python.org/issue19099
@ -3308,8 +3378,10 @@ else:
compat_Struct = struct.Struct compat_Struct = struct.Struct
# compat_map/filter() returning an iterator, supposedly the # builtins returning an iterator
# same versioning as for zip below
# compat_map, compat_filter
# supposedly the same versioning as for zip below
try: try:
from future_builtins import map as compat_map from future_builtins import map as compat_map
except ImportError: except ImportError:
@ -3326,6 +3398,7 @@ except ImportError:
except ImportError: except ImportError:
compat_filter = filter compat_filter = filter
# compat_zip
try: try:
from future_builtins import zip as compat_zip from future_builtins import zip as compat_zip
except ImportError: # not 2.6+ or is 3.x except ImportError: # not 2.6+ or is 3.x
@ -3335,6 +3408,7 @@ except ImportError: # not 2.6+ or is 3.x
compat_zip = zip compat_zip = zip
# compat_itertools_zip_longest
# method renamed between Py2/3 # method renamed between Py2/3
try: try:
from itertools import zip_longest as compat_itertools_zip_longest from itertools import zip_longest as compat_itertools_zip_longest
@ -3342,7 +3416,8 @@ except ImportError:
from itertools import izip_longest as compat_itertools_zip_longest from itertools import izip_longest as compat_itertools_zip_longest
# new class in collections # compat_collections_chain_map
# collections.ChainMap: new class
try: try:
from collections import ChainMap as compat_collections_chain_map from collections import ChainMap as compat_collections_chain_map
# Py3.3's ChainMap is deficient # Py3.3's ChainMap is deficient
@ -3398,19 +3473,22 @@ except ImportError:
def new_child(self, m=None, **kwargs): def new_child(self, m=None, **kwargs):
m = m or {} m = m or {}
m.update(kwargs) m.update(kwargs)
return compat_collections_chain_map(m, *self.maps) # support inheritance !
return type(self)(m, *self.maps)
@property @property
def parents(self): def parents(self):
return compat_collections_chain_map(*(self.maps[1:])) return type(self)(*(self.maps[1:]))
# compat_re_Pattern, compat_re_Match
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?) # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
compat_re_Pattern = type(re.compile('')) compat_re_Pattern = type(re.compile(''))
# and on the type of a match # and on the type of a match
compat_re_Match = type(re.match('a', 'a')) compat_re_Match = type(re.match('a', 'a'))
# compat_base64_b64decode
if sys.version_info < (3, 3): if sys.version_info < (3, 3):
def compat_b64decode(s, *args, **kwargs): def compat_b64decode(s, *args, **kwargs):
if isinstance(s, compat_str): if isinstance(s, compat_str):
@ -3422,6 +3500,7 @@ else:
compat_base64_b64decode = compat_b64decode compat_base64_b64decode = compat_b64decode
# compat_ctypes_WINFUNCTYPE
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0): if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2]. # names, see the original PyPy issue [1] and the youtube-dl one [2].
@ -3440,6 +3519,7 @@ else:
return ctypes.WINFUNCTYPE(*args, **kwargs) return ctypes.WINFUNCTYPE(*args, **kwargs)
# compat_open
if sys.version_info < (3, 0): if sys.version_info < (3, 0):
# open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None # open(file, mode='r', buffering=- 1, encoding=None, errors=None, newline=None, closefd=True) not: opener=None
def compat_open(file_, *args, **kwargs): def compat_open(file_, *args, **kwargs):
@ -3467,18 +3547,28 @@ except AttributeError:
def compat_datetime_timedelta_total_seconds(td): def compat_datetime_timedelta_total_seconds(td):
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
# optional decompression packages # optional decompression packages
# compat_brotli
# PyPi brotli package implements 'br' Content-Encoding # PyPi brotli package implements 'br' Content-Encoding
try: try:
import brotli as compat_brotli import brotli as compat_brotli
except ImportError: except ImportError:
compat_brotli = None compat_brotli = None
# compat_ncompress
# PyPi ncompress package implements 'compress' Content-Encoding # PyPi ncompress package implements 'compress' Content-Encoding
try: try:
import ncompress as compat_ncompress import ncompress as compat_ncompress
except ImportError: except ImportError:
compat_ncompress = None compat_ncompress = None
# compat_zstandard
# PyPi zstandard package implements 'zstd' Content-Encoding (RFC 8878 7.2)
try:
import zstandard as compat_zstandard
except ImportError:
compat_zstandard = None
legacy = [ legacy = [
'compat_HTMLParseError', 'compat_HTMLParseError',
@ -3495,6 +3585,7 @@ legacy = [
'compat_getpass', 'compat_getpass',
'compat_parse_qs', 'compat_parse_qs',
'compat_realpath', 'compat_realpath',
'compat_shlex_split',
'compat_urllib_parse_parse_qs', 'compat_urllib_parse_parse_qs',
'compat_urllib_parse_unquote', 'compat_urllib_parse_unquote',
'compat_urllib_parse_unquote_plus', 'compat_urllib_parse_unquote_plus',
@ -3508,8 +3599,6 @@ legacy = [
__all__ = [ __all__ = [
'compat_html_parser_HTMLParseError',
'compat_html_parser_HTMLParser',
'compat_Struct', 'compat_Struct',
'compat_base64_b64decode', 'compat_base64_b64decode',
'compat_basestring', 'compat_basestring',
@ -3518,13 +3607,9 @@ __all__ = [
'compat_chr', 'compat_chr',
'compat_collections_abc', 'compat_collections_abc',
'compat_collections_chain_map', 'compat_collections_chain_map',
'compat_datetime_timedelta_total_seconds',
'compat_http_cookiejar',
'compat_http_cookiejar_Cookie',
'compat_http_cookies',
'compat_http_cookies_SimpleCookie',
'compat_contextlib_suppress', 'compat_contextlib_suppress',
'compat_ctypes_WINFUNCTYPE', 'compat_ctypes_WINFUNCTYPE',
'compat_datetime_timedelta_total_seconds',
'compat_etree_fromstring', 'compat_etree_fromstring',
'compat_etree_iterfind', 'compat_etree_iterfind',
'compat_filter', 'compat_filter',
@ -3533,6 +3618,12 @@ __all__ = [
'compat_getpass_getpass', 'compat_getpass_getpass',
'compat_html_entities', 'compat_html_entities',
'compat_html_entities_html5', 'compat_html_entities_html5',
'compat_html_parser_HTMLParseError',
'compat_html_parser_HTMLParser',
'compat_http_cookiejar',
'compat_http_cookiejar_Cookie',
'compat_http_cookies',
'compat_http_cookies_SimpleCookie',
'compat_http_client', 'compat_http_client',
'compat_http_server', 'compat_http_server',
'compat_input', 'compat_input',
@ -3555,7 +3646,7 @@ __all__ = [
'compat_register_utf8', 'compat_register_utf8',
'compat_setenv', 'compat_setenv',
'compat_shlex_quote', 'compat_shlex_quote',
'compat_shlex_split', 'compat_shutil_get_terminal_size',
'compat_socket_create_connection', 'compat_socket_create_connection',
'compat_str', 'compat_str',
'compat_struct_pack', 'compat_struct_pack',
@ -3575,5 +3666,5 @@ __all__ = [
'compat_xml_etree_register_namespace', 'compat_xml_etree_register_namespace',
'compat_xpath', 'compat_xpath',
'compat_zip', 'compat_zip',
'workaround_optparse_bug9161', 'compat_zstandard',
] ]

View File

@ -3,25 +3,71 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
js_to_json, js_to_json,
) )
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
import re
class RTPIE(InfoExtractor): class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' _VALID_URL = r'https?://(?:(?:(?:www\.)?rtp\.pt/play/(?P<subarea>.*/)?p(?P<program_id>[0-9]+)/(?P<episode_id>e[0-9]+/)?)|(?:arquivos\.rtp\.pt/conteudos/))(?P<id>[^/?#]+)/?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'url': 'https://www.rtp.pt/play/p117/e563265/os-contemporaneos',
'md5': 'e736ce0c665e459ddb818546220b4ef8',
'info_dict': { 'info_dict': {
'id': 'e174042', 'id': 'os-contemporaneos',
'ext': 'mp3', 'ext': 'mp4',
'title': 'Paixões Cruzadas', 'title': 'Os Contemporâneos Episódio 1',
'description': 'As paixões musicais de António Cartaxo e António Macedo', 'description': 'Os Contemporâneos, um programa de humor com um olhar na sociedade portuguesa!',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.(jpg|png)'
}, },
}, { }, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'url': 'https://www.rtp.pt/play/p8157/e541212/telejornal',
'info_dict': {
'id': 'telejornal',
'ext': 'mp4',
'title': 'Telejornal de 01 Mai 2021 PARTE 1',
'description': 'A mais rigorosa seleção de notícias, todos os dias às 20h00. De segunda a domingo, João Adelino Faria e José Rodrigues dos Santos mostram-lhe o que de'
},
}, {
'url': 'https://www.rtp.pt/play/p6646/e457262/grande-entrevista',
'info_dict': {
'id': 'grande-entrevista',
'ext': 'mp4',
'title': 'Grande Entrevista Episódio 7 - de 19 Fev 2020',
'description': 'Bruno Nogueira - É um dos mais originais humoristas portugueses e de maior êxito! Bruno Nogueira na Grande Entrevista com Vítor Gonçalves.'
},
}, {
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/e539826/portugues-1-ano',
'info_dict': {
'id': 'portugues-1-ano',
'ext': 'mp4',
'title': 'Português - 1.º ano , aula 45 - 27 Abr 2021 - Estudo Em Casa - RTP',
'description': 'A História do Pedrito Coelho, de Beatrix Potter. O dígrafo \'lh\' - A História do Pedrito Coelho, de Beatrix Potter. O dígrafo \'lh\'.'
},
}, {
'url': 'https://www.rtp.pt/play/zigzag/p5449/e385973/banda-zig-zag',
'info_dict': {
'id': 'banda-zig-zag',
'ext': 'mp4',
'title': 'Banda Zig Zag Episódio 1 - Zig Zag Play - RTP',
'description': 'A Amizade é o Nosso Mel - Zig: é a menina que além de tocar também canta. Adora aprender palavras novas e adora ler. Gosta de fazer palavras cruzadas'
},
}, {
'url': 'https://arquivos.rtp.pt/conteudos/liga-dos-ultimos-152/',
'info_dict': {
'id': 'liga-dos-ultimos-152',
'ext': 'mp4',
'title': 'Liga dos Últimos RTP Arquivos',
'description': 'Magazine desportivo, com apresentação de Álvaro Costa e comentários em estúdio do professor Hernâni Gonçalves e do sociólogo João Nuno Coelho. Destaque para os jogos de futebol das equipas dos escalões secundários de Portugal, com momentos dos jogos: Agrário de Lamas vs Pampilhoense e Apúlia vs Fragoso.'
},
}, {
'url': 'https://www.rtp.pt/play/p510/aleixo-fm',
'only_matching': True, 'only_matching': True,
}] }]
@ -29,38 +75,115 @@ class RTPIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
'twitter:title', webpage, display_name='title', fatal=True)
config = self._parse_json(self._search_regex( # Remove JS multi-line comments from webpage source
r'(?s)RTPPlayer\(({.+?})\);', webpage, webpage = re.sub(r'(\/\*.*\*\/)', '', webpage, flags=re.DOTALL)
'player config'), video_id, js_to_json)
file_url = config['file'] title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
if 'Este episódio não se encontra disponível' in title:
raise ExtractorError('Episode unavailable', expected=True)
# Replace irrelevant text in title
title = re.sub(r' - ?RTP Play - RTP', '', title)
# Check if it's a video split in parts, if so add part number to title
part = self._html_search_regex(r'section\-parts.*<span.*>(.+?)</span>.*</ul>', webpage, 'part', default=None)
if part:
title = '{title} {part}'.format(title=title, part=part)
# Get JS object
js_object = self._search_regex(r'(?s)RTPPlayer *\( *({.+?}) *\);', webpage, 'player config')
json_string_for_config = ''
filekey_found = False
# Verify JS object since it isn't pure JSON and probably needs some fixing
for line in js_object.splitlines():
stripped_line = line.strip()
# If JS object key is 'fileKey'
if re.match('fileKey ?:', stripped_line):
filekey_found = True
if 'decodeURIComponent' in stripped_line:
# 1) The value is an encoded URL
encoded_url = re.match(r"[^[]*\[([^]]*)\]", stripped_line).groups()[0]
encoded_url = re.sub(r'[\s"\',]', '', encoded_url)
if 'atob' in stripped_line:
# Most of the times 'atob' approach is used but not always so we need to be sure
decoded_url = compat_b64decode(
compat_urllib_parse_unquote(
encoded_url)).decode('utf-8')
else:
# If no 'atob' we just need to unquote it
decoded_url = compat_urllib_parse_unquote(encoded_url)
# Insert the (relative) decoded URL in JSON
json_string_for_config += '\nfileKey: "{decoded_url}",'.format(decoded_url=decoded_url)
else:
# 2) ... or the value URL is not encoded so keep it that way
json_string_for_config += '\n{stripped_line}'.format(stripped_line=stripped_line)
elif (
not stripped_line.startswith("//")
and not re.match('.*extraSettings ?:', stripped_line)
and (not filekey_found or (filekey_found and not re.match('file ?:', stripped_line)))
):
# Ignore commented lines and 'extraSettings'. Also ignore 'file' if 'fileKey' already exists
json_string_for_config += '\n{stripped_line}'.format(stripped_line=stripped_line)
# Finally send pure JSON string for JSON parsing
config = self._parse_json(json_string_for_config, video_id, js_to_json)
if 'fileKey' in config:
# 'fileKey' has priority over 'file' on our end
file_url = config['fileKey']
elif 'file' in config:
# 'RTP Arquivos' still uses old regular non-encoded 'file' key
file_url = config['file']
else:
raise ExtractorError('No valid media source found in page')
ext = determine_ext(file_url) ext = determine_ext(file_url)
if ext == 'm3u8':
file_key = config.get('fileKey') if ext == 'mp4':
# Due to recent changes, we need to hardcode the URL like this and download it using 'm3u8'
file_url = 'https://streaming-vod.rtp.pt/hls{file_url}/index-v1-a1.m3u8'.format(file_url=file_url)
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
file_url, video_id, 'mp4', 'm3u8_native', file_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=file_key) m3u8_id='hls')
if file_key: elif ext == 'm3u8':
formats.append({ # It can be downloaded without any further changes
'url': 'https://cdn-ondemand.rtp.pt' + file_key, formats = self._extract_m3u8_formats(
'preference': 1, file_url, video_id, 'mp4', 'm3u8_native',
}) m3u8_id='hls')
self._sort_formats(formats)
else: else:
# Need to set basepath
file_url = 'https://cdn-ondemand.rtp.pt{file_url}'.format(file_url=file_url)
formats = [{ formats = [{
'url': file_url, 'url': file_url,
'ext': ext, 'ext': ext,
}] }]
if config.get('mediaType') == 'audio':
if config['mediaType'] == 'audio':
for f in formats: for f in formats:
f['vcodec'] = 'none' f['vcodec'] = 'none'
subtitles = {}
if 'vtt' in config:
sub_lang, sub_lang_full, sub_url = config['vtt'][0]
subtitles.setdefault(sub_lang, []).append({
'url': sub_url,
'ext': 'vtt',
})
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'description': self._html_search_meta(['description', 'twitter:description'], webpage), 'subtitles': subtitles,
'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), 'description': self._html_search_meta(['og:description', 'description', 'twitter:description'], webpage),
'thumbnail': config['poster'] or self._og_search_thumbnail(webpage),
} }

View File

@ -91,12 +91,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'IOS', 'clientName': 'IOS',
'clientVersion': '19.45.4', 'clientVersion': '20.10.4',
'deviceMake': 'Apple', 'deviceMake': 'Apple',
'deviceModel': 'iPhone16,2', 'deviceModel': 'iPhone16,2',
'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'userAgent': 'com.google.ios.youtube/20.10.4 (iPhone16,2; U; CPU iOS 18_3_2 like Mac OS X;)',
'osName': 'iPhone', 'osName': 'iPhone',
'osVersion': '18.1.0.22B83', 'osVersion': '18.3.2.22D82',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@ -109,7 +109,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'MWEB', 'clientName': 'MWEB',
'clientVersion': '2.20241202.07.00', 'clientVersion': '2.20250311.03.00',
# mweb previously did not require PO Token with this UA # mweb previously did not require PO Token with this UA
'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
}, },
@ -122,7 +122,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'TVHTML5', 'clientName': 'TVHTML5',
'clientVersion': '7.20241201.18.00', 'clientVersion': '7.20250312.16.00',
'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
@ -132,7 +133,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'WEB', 'clientName': 'WEB',
'clientVersion': '2.20241126.01.00', 'clientVersion': '2.20250312.04.00',
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
@ -691,7 +692,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'invidious': '|'.join(_INVIDIOUS_SITES), 'invidious': '|'.join(_INVIDIOUS_SITES),
} }
_PLAYER_INFO_RE = ( _PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})//(?:tv-)?player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
) )
@ -1851,12 +1852,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if func_code: if func_code:
return jsi, player_id, func_code return jsi, player_id, func_code
return self._extract_n_function_code_jsi(video_id, jsi, player_id)
func_name = self._extract_n_function_name(jscode) def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
var_ay = self._search_regex(
r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"(?:\\"|[^"])+"\s*\.\s*split\("\W+"\))(?=\s*[,;])',
jsi.code, 'useful values', default='')
func_name = self._extract_n_function_name(jsi.code)
func_code = jsi.extract_function_code(func_name) func_code = jsi.extract_function_code(func_name)
if var_ay:
func_code = (func_code[0], ';\n'.join((var_ay, func_code[1])))
self.cache.store('youtube-nsig', player_id, func_code) if player_id:
self.cache.store('youtube-nsig', player_id, func_code)
return jsi, player_id, func_code return jsi, player_id, func_code
def _extract_n_function_from_code(self, jsi, func_code): def _extract_n_function_from_code(self, jsi, func_code):

View File

@ -1,10 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import calendar
import itertools import itertools
import json import json
import operator import operator
import re import re
import time
from functools import update_wrapper, wraps from functools import update_wrapper, wraps
@ -12,8 +14,10 @@ from .utils import (
error_to_compat_str, error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none,
js_to_json, js_to_json,
remove_quotes, remove_quotes,
str_or_none,
unified_timestamp, unified_timestamp,
variadic, variadic,
write_string, write_string,
@ -150,6 +154,7 @@ def _js_to_primitive(v):
) )
# more exact: yt-dlp/yt-dlp#12110
def _js_toString(v): def _js_toString(v):
return ( return (
'undefined' if v is JS_Undefined 'undefined' if v is JS_Undefined
@ -158,7 +163,7 @@ def _js_toString(v):
else 'null' if v is None else 'null' if v is None
# bool <= int: do this first # bool <= int: do this first
else ('false', 'true')[v] if isinstance(v, bool) else ('false', 'true')[v] if isinstance(v, bool)
else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types) else re.sub(r'(?<=\d)\.?0*$', '', '{0:.7f}'.format(v)) if isinstance(v, compat_numeric_types)
else _js_to_primitive(v)) else _js_to_primitive(v))
@ -404,6 +409,7 @@ class JSInterpreter(object):
class Exception(ExtractorError): class Exception(ExtractorError):
def __init__(self, msg, *args, **kwargs): def __init__(self, msg, *args, **kwargs):
expr = kwargs.pop('expr', None) expr = kwargs.pop('expr', None)
msg = str_or_none(msg, default='"None"')
if expr is not None: if expr is not None:
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr) msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs) super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
@ -431,6 +437,7 @@ class JSInterpreter(object):
flags, _ = self.regex_flags(flags) flags, _ = self.regex_flags(flags)
# First, avoid https://github.com/python/cpython/issues/74534 # First, avoid https://github.com/python/cpython/issues/74534
self.__self = None self.__self = None
pattern_txt = str_or_none(pattern_txt) or '(?:)'
self.__pattern_txt = pattern_txt.replace('[[', r'[\[') self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
self.__flags = flags self.__flags = flags
@ -475,6 +482,73 @@ class JSInterpreter(object):
flags |= cls.RE_FLAGS[ch] flags |= cls.RE_FLAGS[ch]
return flags, expr[idx + 1:] return flags, expr[idx + 1:]
class JS_Date(object):
_t = None
@staticmethod
def __ymd_etc(*args, **kw_is_utc):
# args: year, monthIndex, day, hours, minutes, seconds, milliseconds
is_utc = kw_is_utc.get('is_utc', False)
args = list(args[:7])
args += [0] * (9 - len(args))
args[1] += 1 # month 0..11 -> 1..12
ms = args[6]
for i in range(6, 9):
args[i] = -1 # don't know
if is_utc:
args[-1] = 1
# TODO: [MDN] When a segment overflows or underflows its expected
# range, it usually "carries over to" or "borrows from" the higher segment.
try:
mktime = calendar.timegm if is_utc else time.mktime
return mktime(time.struct_time(args)) * 1000 + ms
except (OverflowError, ValueError):
return None
@classmethod
def UTC(cls, *args):
t = cls.__ymd_etc(*args, is_utc=True)
return _NaN if t is None else t
@staticmethod
def parse(date_str, **kw_is_raw):
is_raw = kw_is_raw.get('is_raw', False)
t = unified_timestamp(str_or_none(date_str), False)
return int(t * 1000) if t is not None else t if is_raw else _NaN
@staticmethod
def now(**kw_is_raw):
is_raw = kw_is_raw.get('is_raw', False)
t = time.time()
return int(t * 1000) if t is not None else t if is_raw else _NaN
def __init__(self, *args):
if not args:
args = [self.now(is_raw=True)]
if len(args) == 1:
if isinstance(args[0], JSInterpreter.JS_Date):
self._t = int_or_none(args[0].valueOf(), default=None)
else:
arg_type = _js_typeof(args[0])
if arg_type == 'string':
self._t = self.parse(args[0], is_raw=True)
elif arg_type == 'number':
self._t = int(args[0])
else:
self._t = self.__ymd_etc(*args)
def toString(self):
try:
return time.strftime('%a %b %0d %Y %H:%M:%S %Z%z', self._t).rstrip()
except TypeError:
return "Invalid Date"
def valueOf(self):
return _NaN if self._t is None else self._t
@classmethod @classmethod
def __op_chars(cls): def __op_chars(cls):
op_chars = set(';,[') op_chars = set(';,[')
@ -599,14 +673,15 @@ class JSInterpreter(object):
except Exception as e: except Exception as e:
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e) raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
def _index(self, obj, idx, allow_undefined=True): def _index(self, obj, idx, allow_undefined=None):
if idx == 'length' and isinstance(obj, list): if idx == 'length' and isinstance(obj, list):
return len(obj) return len(obj)
try: try:
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)] return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
except (TypeError, KeyError, IndexError) as e: except (TypeError, KeyError, IndexError) as e:
if allow_undefined: # allow_undefined is None gives correct behaviour
# when is not allowed? if allow_undefined or (
allow_undefined is None and not isinstance(e, TypeError)):
return JS_Undefined return JS_Undefined
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e) raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
@ -715,7 +790,7 @@ class JSInterpreter(object):
new_kw, _, obj = expr.partition('new ') new_kw, _, obj = expr.partition('new ')
if not new_kw: if not new_kw:
for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)), for klass, konstr in (('Date', lambda *x: self.JS_Date(*x).valueOf()),
('RegExp', self.JS_RegExp), ('RegExp', self.JS_RegExp),
('Error', self.Exception)): ('Error', self.Exception)):
if not obj.startswith(klass + '('): if not obj.startswith(klass + '('):
@ -1034,6 +1109,7 @@ class JSInterpreter(object):
'String': compat_str, 'String': compat_str,
'Math': float, 'Math': float,
'Array': list, 'Array': list,
'Date': self.JS_Date,
} }
obj = local_vars.get(variable) obj = local_vars.get(variable)
if obj in (JS_Undefined, None): if obj in (JS_Undefined, None):
@ -1086,6 +1162,8 @@ class JSInterpreter(object):
assertion(len(argvals) == 2, 'takes two arguments') assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1] return argvals[0] ** argvals[1]
raise self.Exception('Unsupported Math method ' + member, expr=expr) raise self.Exception('Unsupported Math method ' + member, expr=expr)
elif obj is self.JS_Date:
return getattr(obj, member)(*argvals)
if member == 'split': if member == 'split':
assertion(len(argvals) <= 2, 'takes at most two arguments') assertion(len(argvals) <= 2, 'takes at most two arguments')