Handle user:pass in URLs

Fixes "nonnumeric port" errors when youtube-dl is given URLs with
usernames and passwords such as:

    http://username:password@example.com/myvideo.mp4

Refs:
- https://en.wikipedia.org/wiki/Basic_access_authentication
- https://tools.ietf.org/html/rfc1738#section-3.1
- https://docs.python.org/3.8/library/urllib.parse.html#urllib.parse.urlsplit

Fixes #18276 (point 4)
Fixes #20258
Fixes #26211 (see comment)
This commit is contained in:
Hubert Hirtz 2021-04-19 14:07:45 +02:00
parent a803582717
commit 18502b877f
2 changed files with 47 additions and 1 deletions

View File

@ -65,6 +65,8 @@ from youtube_dl.utils import (
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitize_url, sanitize_url,
extract_user_pass,
sanitized_Request,
expand_path, expand_path,
prepend_extension, prepend_extension,
replace_extension, replace_extension,
@ -237,6 +239,26 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
def test_extract_user_pass(self):
self.assertEqual(extract_user_pass('http://foo.bar'), ('http://foo.bar', None, None))
self.assertEqual(extract_user_pass('http://:foo.bar'), ('http://:foo.bar', None, None))
self.assertEqual(extract_user_pass('http://@foo.bar'), ('http://foo.bar', '', ''))
self.assertEqual(extract_user_pass('http://:pass@foo.bar'), ('http://foo.bar', '', 'pass'))
self.assertEqual(extract_user_pass('http://user:@foo.bar'), ('http://foo.bar', 'user', ''))
self.assertEqual(extract_user_pass('http://user:pass@foo.bar'), ('http://foo.bar', 'user', 'pass'))
def test_sanitized_Request(self):
self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
'Basic Og==')
self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
'Basic OnBhc3M=')
self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
'Basic dXNlcjo=')
self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
'Basic dXNlcjpwYXNz')
def test_expand_path(self): def test_expand_path(self):
def env(var): def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)

View File

@ -2154,8 +2154,32 @@ def sanitize_url(url):
return url return url
def extract_user_pass(url):
parts = compat_urlparse.urlsplit(url)
username = parts.username
password = parts.password
if username is not None:
if password is None:
password = ''
netloc = parts.hostname
if parts.port is not None:
netloc = parts.hostname + ':' + parts.port
parts = parts._replace(netloc=netloc)
url = compat_urlparse.urlunsplit(parts)
return url, username, password
def sanitized_Request(url, *args, **kwargs): def sanitized_Request(url, *args, **kwargs):
return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs) url = sanitize_url(url)
url, username, password = extract_user_pass(url)
if username is not None:
# password is not None
auth_payload = username + ':' + password
auth_payload = base64.b64encode(auth_payload.encode('utf-8')).decode('utf-8')
auth_header = 'Basic ' + auth_payload
headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
headers['Authorization'] = auth_header
return compat_urllib_request.Request(url, *args, **kwargs)
def expand_path(s): def expand_path(s):