This commit is contained in:
u-n-k-n-o-w-n 2024-12-30 16:35:29 +08:00 committed by GitHub
commit 98e6fccb0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 142 additions and 1 deletions

View File

@ -265,7 +265,10 @@ def t_factory(name, sig_func, url_pattern):
test_id = m.group('id')
def test_func(self):
basename = 'player-{0}-{1}.js'.format(name, test_id)
tn = name
if name.endswith('_wd'):
tn = name[:-3]
basename = 'player-{0}-{1}.js'.format(tn, test_id)
fn = os.path.join(self.TESTDATA_DIR, basename)
if not os.path.exists(fn):
@ -293,6 +296,10 @@ def n_sig(jscode, sig_input):
funcname, sig_input, _ytdl_do_not_return=sig_input)
def n_sig_wd(jscode, sig_input):
return YoutubeIE(FakeYDL())._call_n_function_with_webdriver('chrome', jscode, sig_input)
make_sig_test = t_factory(
'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
for test_spec in _SIG_TESTS:
@ -303,6 +310,17 @@ make_nsig_test = t_factory(
for test_spec in _NSIG_TESTS:
make_nsig_test(*test_spec)
test_wd = False
for arg in sys.argv:
if arg == '--test_wd':
test_wd = True
break
if test_wd:
sys.argv = [arg for arg in sys.argv if arg != '--test_wd']
make_nsig_wd_test = t_factory(
'nsig_wd', n_sig_wd, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$'))
for test_spec in _NSIG_TESTS:
make_nsig_wd_test(*test_spec)
if __name__ == '__main__':
unittest.main()

View File

@ -419,6 +419,7 @@ def _real_main(argv=None):
'call_home': opts.call_home,
'sleep_interval': opts.sleep_interval,
'max_sleep_interval': opts.max_sleep_interval,
'webdriver': opts.webdriver,
'external_downloader': opts.external_downloader,
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,

View File

@ -2448,6 +2448,11 @@ try:
except ImportError:
import BaseHTTPServer as compat_http_server
try:
from urllib.parse import quote as compat_urllib_quote
except ImportError: # Python 2
from urllib import quote as compat_urllib_quote
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
@ -3560,6 +3565,7 @@ __all__ = [
'compat_tokenize_tokenize',
'compat_urllib_error',
'compat_urllib_parse',
'compat_urllib_quote',
'compat_urllib_request',
'compat_urllib_request_DataHandler',
'compat_urllib_response',

View File

@ -11,6 +11,7 @@ import random
import re
import time
import traceback
import importlib
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
@ -21,11 +22,13 @@ from ..compat import (
compat_urllib_parse,
compat_urllib_parse_parse_qs as compat_parse_qs,
compat_urllib_parse_unquote_plus,
compat_urllib_quote,
compat_urllib_parse_urlparse,
compat_zip as zip,
)
from ..jsinterp import JSInterpreter
from ..utils import (
check_executable,
clean_html,
dict_get,
error_to_compat_str,
@ -1493,6 +1496,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
super(YoutubeIE, self).__init__(*args, **kwargs)
self._code_cache = {}
self._player_cache = {}
self._webdriver_wrapper = None
# *ytcfgs, webpage=None
def _extract_player_url(self, *ytcfgs, **kw_webpage):
@ -1669,6 +1673,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_url is None:
raise ExtractorError('Cannot decrypt nsig without player_url')
webdriver_type = self._downloader.params.get('webdriver', None)
if webdriver_type is not None:
try:
jscode = self._load_player(video_id, player_url)
ret = self._call_n_function_with_webdriver(webdriver_type, jscode, n)
except Exception as e:
self.report_warning(
'%s (%s %s)' % (
'Unable to decode n-parameter: download likely to be throttled',
error_to_compat_str(e),
traceback.format_exc()),
video_id=video_id)
return
self.write_debug('Decrypted nsig(with webdriver) {0} => {1}'.format(n, ret))
return ret
try:
jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
except ExtractorError as e:
@ -1692,6 +1712,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug('Decrypted nsig {0} => {1}'.format(n, ret))
return ret
def _call_n_function_with_webdriver(self, webdriver_type, jscode, n_param):
if self._webdriver_wrapper is None:
self._webdriver_wrapper = WebDriverJSWrapper(webdriver_type)
self._webdriver_wrapper.get('about:blank')
funcname = self._extract_n_function_name(jscode)
alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
dummyfunc = ''.join(random.choice(alphabet) for _ in range(8))
f = ('return ((e) => {{'
'const d = decodeURIComponent(e);'
'const p = d.lastIndexOf("}}");'
'const th = d.substring(0, p);'
'const bh = d.substring(p);'
'const m = "var {0};" + th + ";{0} = {1};" + bh;'
'const s = document.createElement("script");'
's.innerHTML = m;'
'document.body.append(s);'
'return {0}("{2}");'
'}})("{3}");').format(dummyfunc, funcname, n_param, compat_urllib_quote(jscode))
n = self._webdriver_wrapper.executeJS(f)
return n
def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex(
# (y=NuD(),Mw(k),q=k.Z[y]||null)&&(q=narray[idx](q),k.set(y,q),k.V||NuD(''))}};
@ -3916,3 +3957,75 @@ class YoutubeTruncatedIDIE(InfoExtractor):
raise ExtractorError(
'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
expected=True)
class WebDriverJSWrapper(object):
"""WebDriver Wrapper class"""
def __init__(self, webdriver_type, pageload_timeout=10, script_timeout=5):
self._webdriver = None
try:
wd = importlib.import_module('selenium.webdriver')
except ImportError as e:
self._raise_exception('Failed to import module "selenium.webdriver"', cause=e)
if webdriver_type == 'firefox': # geckodriver
if not check_executable('geckodriver', ['--version']):
self._raise_exception('geckodriver not found in PATH')
o = wd.FirefoxOptions()
o.headless = True
s = wd.firefox.service.Service(log_path=os.path.devnull)
self._webdriver = wd.Firefox(options=o, service=s)
elif webdriver_type == 'chrome': # chromedriver
if not check_executable('chromedriver', ['--version']):
self._raise_exception('chromedriver not found in PATH')
o = wd.ChromeOptions()
o.headless = True
"""
If you are using the snap version of the chromium, chromedriver is included in the snap package.
You should use that driver.
$ cd /snap/bin && sudo ln -s -T chromium.chromedriver chromedriver
or
s = wd.chrome.service.Service(executable_path='chromium.chromedriver')
self._webdriver = wd.Chrome(options=o, service=s)
"""
self._webdriver = wd.Chrome(options=o)
elif webdriver_type == 'edge': # msedgedriver
if not check_executable('msedgedriver', ['--version']):
self._raise_exception('msedgedriver not found in PATH')
o = wd.EdgeOptions()
o.headless = True
self._webdriver = wd.Edge(options=o)
elif webdriver_type == 'safari': # safaridriver
if not check_executable('safaridriver', ['--version']):
self._raise_exception('safaridriver not found in PATH')
"""
safaridriver does not have headless-mode. :(
But macOS includes safaridriver by default.
To enable automation on safaridriver, run the following command once from the admin terminal.
# safaridriver --enable
"""
self._webdriver = wd.Safari()
else:
self._raise_exception('unsupported type: %s' % (webdriver_type))
self._webdriver.set_page_load_timeout(pageload_timeout)
self._webdriver.set_script_timeout(script_timeout)
def __del__(self):
if self._webdriver is not None:
self._webdriver.quit()
def _raise_exception(self, msg, cause=None):
raise ExtractorError('[WebDriverJSWrapper] %s' % (msg), cause=cause)
def get(self, url):
"""Loads a web page in the current browser session"""
self._webdriver.get(url)
def executeJS(self, jscode):
"""Execute JS and return value"""
try:
ret = self._webdriver.execute_script(jscode)
except Exception as e:
self._raise_exception('Failed to execute JS', cause=e)
return ret

View File

@ -576,6 +576,9 @@ def parseOpts(overrideArguments=None):
'Upper bound of a range for randomized sleep before each download '
'(maximum possible number of seconds to sleep). Must only be used '
'along with --min-sleep-interval.'))
workarounds.add_option(
'--webdriver', metavar='TYPE', dest='webdriver', default=None,
help='Specify webdriver type when you want to use Selenium to execute YouTube\'s "n_function" in order to avoid throttling: "firefox", "chrome", "edge", or "safari"')
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
verbosity.add_option(