diff --git a/README.md b/README.md index 6e07ddb1c..92de72993 100644 --- a/README.md +++ b/README.md @@ -276,6 +276,13 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo info-json" option) --cookies FILE File to read cookies from and dump cookie jar in + --cookies-from-browser BROWSER Load cookies from a user profile of the + given web browser: brave, chrome, + chromium, edgefirefox, opera, safari, + vivaldi. You can specify an alternative + user profile name or directory using + "BROWSER:PROFILE_NAME" or + "BROWSER:PROFILE_PATH" --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default @@ -916,14 +923,19 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op ### How do I pass cookies to youtube-dl? -Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. - -In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). - -Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. +If you have access to a web browser, simply log into youtube in the browser and pass `--cookies-from-browser ` to youtube-dl (see `youtube-dl --help` for supported browsers). Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). +#### Passing cookies without a web browser +Warning: be careful with your cookies file as it can be used to gain access to any accounts you are logged into! + +If you do not have access to a web browser (e.g. if running headless) then manually extract cookies to a file from a web browser on another machine. There are browser extensions that can do this such as [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. + +After you have extracted a cookies file, pass it to youtube-dl using the option: `--cookies ` + + ### How do I stream directly to media player? You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with: diff --git a/test/test_cookies.py b/test/test_cookies.py new file mode 100644 index 000000000..c3365bbcc --- /dev/null +++ b/test/test_cookies.py @@ -0,0 +1,106 @@ +import unittest +from datetime import datetime, timezone + +from youtube_dl import cookies +from youtube_dl.cookies import ( + LinuxChromeCookieDecryptor, + WindowsChromeCookieDecryptor, + CRYPTO_AVAILABLE, + MacChromeCookieDecryptor, + parse_safari_cookies, + pbkdf2_sha1, + PBKDF2_AVAILABLE, + YDLLogger, +) + + +class MonkeyPatch: + def __init__(self, module, temporary_values): + self._module = module + self._temporary_values = temporary_values + self._backup_values = {} + + def __enter__(self): + for name, temp_value in self._temporary_values.items(): + self._backup_values[name] = getattr(self._module, name) + setattr(self._module, name, temp_value) + + def __exit__(self, exc_type, exc_val, exc_tb): + for name, backup_value in self._backup_values.items(): + setattr(self._module, name, backup_value) + + +class TestCookies(unittest.TestCase): + @unittest.skipIf(not PBKDF2_AVAILABLE, 'PBKDF2 not available') + def test_chrome_cookie_decryptor_linux_derive_key(self): + key = LinuxChromeCookieDecryptor(None, YDLLogger()).derive_key(b'abc') + self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17') + + @unittest.skipIf(not PBKDF2_AVAILABLE, 'PBKDF2 not available') + def test_chrome_cookie_decryptor_mac_derive_key(self): + key = MacChromeCookieDecryptor(None, YDLLogger()).derive_key(b'abc') + self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY') + + @unittest.skipIf(not PBKDF2_AVAILABLE, 'PBKDF2 not available') + def test_chrome_cookie_decryptor_linux_v10(self): + with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): + encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6' + value = 'USD' + decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger()) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + + @unittest.skipIf(not PBKDF2_AVAILABLE, 'PBKDF2 not available') + def test_chrome_cookie_decryptor_linux_v11(self): + with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b'', + 'KEYRING_AVAILABLE': True}): + encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd' + value = 'tz=Europe.London' + decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger()) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + + @unittest.skipIf(not PBKDF2_AVAILABLE, 'PBKDF2 not available') + @unittest.skipIf(not CRYPTO_AVAILABLE, 'cryptography library not available') + def test_chrome_cookie_decryptor_windows_v10(self): + with MonkeyPatch(cookies, { + '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2\xc3A\x00\x00\x80\xc3\x07:\xc3A' \ + b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \ + b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \ + b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(' + + jar = parse_safari_cookies(cookies) + self.assertEqual(len(jar), 1) + cookie = list(jar)[0] + self.assertEqual(cookie.domain, 'localhost') + self.assertEqual(cookie.port, None) + self.assertEqual(cookie.path, '/') + self.assertEqual(cookie.name, 'foo') + self.assertEqual(cookie.value, 'test%20%3Bcookie') + self.assertFalse(cookie.secure) + expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc) + self.assertEqual(cookie.expires, int(expected_expiration.timestamp())) + + def test_pbkdf2_sha1(self): + key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16) + if PBKDF2_AVAILABLE: + self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34') + else: + self.assertIsNone(key) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8e8546596..47c703715 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -30,7 +30,6 @@ from string import ascii_letters from .compat import ( compat_basestring, - compat_cookiejar, compat_get_terminal_size, compat_http_client, compat_kwargs, @@ -42,6 +41,7 @@ from .compat import ( compat_urllib_request, compat_urllib_request_DataHandler, ) +from .cookies import load_cookies from .utils import ( age_restricted, args_to_str, @@ -90,7 +90,6 @@ from .utils import ( version_tuple, write_json_file, write_string, - YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, YoutubeDLRedirectHandler, @@ -211,6 +210,9 @@ class YoutubeDL(object): Videos already present in the file are not downloaded again. cookiefile: File name where cookies should be read from and dumped to. + cookiesfrombrowser:A tuple containing the name of the browser and the profile + name/path from where cookies are loaded. + Eg: ('chrome', ) or (vivaldi, 'default') nocheckcertificate:Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. @@ -2379,16 +2381,11 @@ class YoutubeDL(object): timeout_val = self.params.get('socket_timeout') self._socket_timeout = 600 if timeout_val is None else float(timeout_val) + opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') opts_cookiefile = self.params.get('cookiefile') opts_proxy = self.params.get('proxy') - if opts_cookiefile is None: - self.cookiejar = compat_cookiejar.CookieJar() - else: - opts_cookiefile = expand_path(opts_cookiefile) - self.cookiejar = YoutubeDLCookieJar(opts_cookiefile) - if os.access(opts_cookiefile, os.R_OK): - self.cookiejar.load(ignore_discard=True, ignore_expires=True) + self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self) cookie_processor = YoutubeDLCookieProcessor(self.cookiejar) if opts_proxy is not None: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e1bd67919..aa15d1171 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -11,7 +11,7 @@ import os import random import sys - +from .cookies import SUPPORTED_BROWSERS from .options import ( parseOpts, ) @@ -216,6 +216,12 @@ def _real_main(argv=None): if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: parser.error('invalid subtitle format specified') + if opts.cookiesfrombrowser is not None: + opts.cookiesfrombrowser = [ + part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)] + if opts.cookiesfrombrowser[0] not in SUPPORTED_BROWSERS: + parser.error('unsupported browser specified for cookies') + if opts.date is not None: date = DateRange.day(opts.date) else: @@ -397,6 +403,7 @@ def _real_main(argv=None): 'age_limit': opts.age_limit, 'download_archive': download_archive_fn, 'cookiefile': opts.cookiefile, + 'cookiesfrombrowser': opts.cookiesfrombrowser, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, 'proxy': opts.proxy, diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 28942a8c1..9e4531d46 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -19,6 +19,7 @@ import socket import struct import subprocess import sys +import tempfile import xml.etree.ElementTree # deal with critical unicode/str things first @@ -2925,6 +2926,7 @@ except TypeError: # Python 2.6 yield n n += step + if sys.version_info >= (3, 0): from tokenize import tokenize as compat_tokenize_tokenize else: @@ -2982,6 +2984,23 @@ except ImportError: except ImportError: compat_filter = filter +try: + from tempfile import TemporaryDirectory as compat_TemporaryDirectory +except ImportError: + class compat_TemporaryDirectory: + def __init__(self, prefix=None): + self._prefix = prefix + self._path = None + + def __enter__(self): + self._path = tempfile.mkdtemp(prefix=self._prefix) + return self._path + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._path is not None: + shutil.rmtree(self._path) + + try: from future_builtins import zip as compat_zip except ImportError: # not 2.6+ or is 3.x @@ -3099,6 +3118,7 @@ __all__ = [ 'compat_HTMLParser', 'compat_HTTPError', 'compat_Struct', + 'compat_TemporaryDirectory', 'compat_b64decode', 'compat_basestring', 'compat_casefold', diff --git a/youtube_dl/cookies.py b/youtube_dl/cookies.py new file mode 100644 index 000000000..22819e7c6 --- /dev/null +++ b/youtube_dl/cookies.py @@ -0,0 +1,779 @@ +import ctypes +import json +import os +import shutil +import struct +import subprocess +import sys +from datetime import datetime, timedelta, timezone + +from youtube_dl.aes import aes_cbc_decrypt +from youtube_dl.compat import ( + compat_cookiejar_Cookie, + compat_b64decode, + compat_TemporaryDirectory, +) +from youtube_dl.utils import ( + YoutubeDLCookieJar, + expand_path, + bytes_to_intlist, + intlist_to_bytes, + bug_reports_message, +) + +try: + import sqlite3 + SQLITE_AVAILABLE = True +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile python without + # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 + SQLITE_AVAILABLE = False + + +try: + from Crypto.Cipher import AES + CRYPTO_AVAILABLE = True +except ImportError: + CRYPTO_AVAILABLE = False + +try: + import keyring + KEYRING_AVAILABLE = True + KEYRING_UNAVAILABLE_REASON = 'due to unknown reasons ' + bug_reports_message() +except ImportError: + KEYRING_AVAILABLE = False + KEYRING_UNAVAILABLE_REASON = ( + 'as the `keyring` module is not installed. ' + 'Please install by running `python -m pip install keyring`. ' + 'Depending on your platform, additional packages may be required ' + 'to access the keyring; see https://pypi.org/project/keyring') +except Exception as _err: + KEYRING_AVAILABLE = False + KEYRING_UNAVAILABLE_REASON = 'as the `keyring` module could not be initialized: {}'.format(_err) + + +CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} +SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} + + +class YDLLogger: + def __init__(self, ydl=None): + self._ydl = ydl + self._reported_warnings = set() + + def debug(self, message): + if self._ydl: + self._ydl.to_screen('[debug] ' + message) + + def info(self, message): + if self._ydl: + self._ydl.to_screen('[Cookies] ' + message) + + def warning(self, message, only_once=False): + if self._ydl: + if only_once: + if message in self._reported_warnings: + return + else: + self._reported_warnings.add(message) + self._ydl.to_stderr(message) + + def error(self, message): + if self._ydl: + self._ydl.to_stderr(message) + + +def load_cookies(cookie_file, browser_specification, ydl): + cookie_jars = [] + if browser_specification is not None: + browser_name, profile = _parse_browser_specification(*browser_specification) + cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl))) + + if cookie_file is not None: + cookie_file = expand_path(cookie_file) + jar = YoutubeDLCookieJar(cookie_file) + if os.access(cookie_file, os.R_OK): + jar.load(ignore_discard=True, ignore_expires=True) + cookie_jars.append(jar) + + return _merge_cookie_jars(cookie_jars) + + +def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger()): + if browser_name == 'firefox': + return _extract_firefox_cookies(profile, logger) + elif browser_name == 'safari': + return _extract_safari_cookies(profile, logger) + elif browser_name in CHROMIUM_BASED_BROWSERS: + return _extract_chrome_cookies(browser_name, profile, logger) + else: + raise ValueError('unknown browser: {}'.format(browser_name)) + + +def _extract_firefox_cookies(profile, logger): + logger.info('Extracting cookies from firefox') + if not SQLITE_AVAILABLE: + logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support') + return YoutubeDLCookieJar() + + if profile is None: + search_root = _firefox_browser_dir() + elif _is_path(profile): + search_root = profile + else: + search_root = os.path.join(_firefox_browser_dir(), profile) + + cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') + if cookie_database_path is None: + raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) + logger.debug('extracting from: "{}"'.format(cookie_database_path)) + + with compat_TemporaryDirectory(prefix='youtube_dl') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') + jar = YoutubeDLCookieJar() + for host, name, value, path, expiry, is_secure in cursor.fetchall(): + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + logger.info('Extracted {} cookies from firefox'.format(len(jar))) + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +def _firefox_browser_dir(): + if sys.platform in ('linux', 'linux2'): + return os.path.expanduser('~/.mozilla/firefox') + elif sys.platform == 'win32': + return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles') + elif sys.platform == 'darwin': + return os.path.expanduser('~/Library/Application Support/Firefox') + else: + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + +def _get_chromium_based_browser_settings(browser_name): + # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md + if sys.platform in ('linux', 'linux2'): + config = _config_home() + browser_dir = { + 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(config, 'google-chrome'), + 'chromium': os.path.join(config, 'chromium'), + 'edge': os.path.join(config, 'microsoft-edge'), + 'opera': os.path.join(config, 'opera'), + 'vivaldi': os.path.join(config, 'vivaldi'), + }[browser_name] + + elif sys.platform == 'win32': + appdata_local = os.path.expandvars('%LOCALAPPDATA%') + appdata_roaming = os.path.expandvars('%APPDATA%') + browser_dir = { + 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, r'Chromium\User Data'), + 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'), + }[browser_name] + + elif sys.platform == 'darwin': + appdata = os.path.expanduser('~/Library/Application Support') + browser_dir = { + 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'), + 'chrome': os.path.join(appdata, 'Google/Chrome'), + 'chromium': os.path.join(appdata, 'Chromium'), + 'edge': os.path.join(appdata, 'Microsoft Edge'), + 'opera': os.path.join(appdata, 'com.operasoftware.Opera'), + 'vivaldi': os.path.join(appdata, 'Vivaldi'), + }[browser_name] + + else: + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + keyring_name = { + 'brave': 'Brave', + 'chrome': 'Chrome', + 'chromium': 'Chromium', + 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', + 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', + 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', + }[browser_name] + + browsers_without_profiles = {'opera'} + + return { + 'browser_dir': browser_dir, + 'keyring_name': keyring_name, + 'supports_profiles': browser_name not in browsers_without_profiles + } + + +def _extract_chrome_cookies(browser_name, profile, logger): + logger.info('Extracting cookies from {}'.format(browser_name)) + + if not SQLITE_AVAILABLE: + logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support').format(browser_name)) + return YoutubeDLCookieJar() + + config = _get_chromium_based_browser_settings(browser_name) + + if profile is None: + search_root = config['browser_dir'] + elif _is_path(profile): + search_root = profile + config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile + else: + if config['supports_profiles']: + search_root = os.path.join(config['browser_dir'], profile) + else: + logger.error('{} does not support profiles'.format(browser_name)) + search_root = config['browser_dir'] + + cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') + if cookie_database_path is None: + raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) + logger.debug('extracting from: "{}"'.format(cookie_database_path)) + + decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger) + + with compat_TemporaryDirectory(prefix='youtube_dl') as tmpdir: + cursor = None + try: + cursor = _open_database_copy(cookie_database_path, tmpdir) + cursor.connection.text_factory = bytes + column_names = _get_column_names(cursor, 'cookies') + secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' + cursor.execute('SELECT host_key, name, value, encrypted_value, path, ' + 'expires_utc, {} FROM cookies'.format(secure_column)) + jar = YoutubeDLCookieJar() + failed_cookies = 0 + for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall(): + host_key = host_key.decode('utf-8') + name = name.decode('utf-8') + value = value.decode('utf-8') + path = path.decode('utf-8') + + if not value and encrypted_value: + value = decryptor.decrypt(encrypted_value) + if value is None: + failed_cookies += 1 + continue + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + if failed_cookies > 0: + failed_message = ' ({} could not be decrypted)'.format(failed_cookies) + else: + failed_message = '' + logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message)) + return jar + finally: + if cursor is not None: + cursor.connection.close() + + +class ChromeCookieDecryptor: + """ + Overview: + + Linux: + - cookies are either v10 or v11 + - v10: AES-CBC encrypted with a fixed key + - v11: AES-CBC encrypted with an OS protected key (keyring) + - v11 keys can be stored in various places depending on the activate desktop environment [2] + + Mac: + - cookies are either v10 or not v10 + - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux + - not v10: 'old data' stored as plaintext + + Windows: + - cookies are either v10 or not v10 + - v10: AES-GCM encrypted with a key which is encrypted with DPAPI + - not v10: encrypted with DPAPI + + Sources: + - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ + - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_linux.cc + - KeyStorageLinux::CreateService + """ + + def decrypt(self, encrypted_value): + raise NotImplementedError + + +def get_cookie_decryptor(browser_root, browser_keyring_name, logger): + if sys.platform in ('linux', 'linux2'): + return LinuxChromeCookieDecryptor(browser_keyring_name, logger) + elif sys.platform == 'darwin': + return MacChromeCookieDecryptor(browser_keyring_name, logger) + elif sys.platform == 'win32': + return WindowsChromeCookieDecryptor(browser_root, logger) + else: + raise NotImplementedError('Chrome cookie decryption is not supported ' + 'on this platform: {}'.format(sys.platform)) + + +class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger): + self._logger = logger + self._v10_key = self.derive_key(b'peanuts') + if KEYRING_AVAILABLE and browser_keyring_name is not None: + self._v11_key = self.derive_key(_get_linux_keyring_password(browser_keyring_name)) + else: + self._v11_key = None + + def derive_key(self, password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16, logger=self._logger) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + + elif version == b'v11': + if self._v11_key is None: + self._logger.warning('cannot decrypt cookie {}'.format(KEYRING_UNAVAILABLE_REASON), only_once=True) + return None + return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger) + + else: + return None + + +class MacChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_keyring_name, logger): + self._logger = logger + if browser_keyring_name is not None: + password = _get_mac_keyring_password(browser_keyring_name, logger) + self._v10_key = None if password is None else self.derive_key(password) + else: + self._v10_key = None + + def derive_key(self, password): + # values from + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16, logger=self._logger) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + + return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger) + + else: + # other prefixes are considered 'old data' which were stored as plaintext + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm + return encrypted_value + + +class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): + def __init__(self, browser_root, logger): + self._logger = logger + self._v10_key = _get_windows_v10_key(browser_root, logger) + + def decrypt(self, encrypted_value): + version = encrypted_value[:3] + ciphertext = encrypted_value[3:] + + if version == b'v10': + if self._v10_key is None: + self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) + return None + elif not CRYPTO_AVAILABLE: + self._logger.warning('cannot decrypt cookie as the `pycryptodome` module is not installed. ' + 'Please install by running `python -m pip install pycryptodome`', + only_once=True) + return None + + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + # kNonceLength + nonce_length = 96 // 8 + # boringssl + # EVP_AEAD_AES_GCM_TAG_LEN + authentication_tag_length = 16 + + raw_ciphertext = ciphertext + nonce = raw_ciphertext[:nonce_length] + ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] + authentication_tag = raw_ciphertext[-authentication_tag_length:] + + return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) + + else: + # any other prefix means the data is DPAPI encrypted + # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + + +def _extract_safari_cookies(profile, logger): + if profile is not None: + logger.error('safari does not support profiles') + if sys.platform != 'darwin': + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + + if not os.path.isfile(cookies_path): + raise FileNotFoundError('could not find safari cookies database') + + with open(cookies_path, 'rb') as f: + cookies_data = f.read() + + jar = parse_safari_cookies(cookies_data, logger=logger) + logger.info('Extracted {} cookies from safari'.format(len(jar))) + return jar + + +class ParserError(Exception): + pass + + +class DataParser: + def __init__(self, data, logger): + self._data = data + self.cursor = 0 + self._logger = logger + + def read_bytes(self, num_bytes): + if num_bytes < 0: + raise ParserError('invalid read of {} bytes'.format(num_bytes)) + end = self.cursor + num_bytes + if end > len(self._data): + raise ParserError('reached end of input') + data = self._data[self.cursor:end] + self.cursor = end + return data + + def expect_bytes(self, expected_value, message): + value = self.read_bytes(len(expected_value)) + if value != expected_value: + raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + + def read_uint(self, big_endian=False): + data_format = '>I' if big_endian else ' 0: + self._logger.debug('skipping {} bytes ({}): {}'.format( + num_bytes, description, self.read_bytes(num_bytes))) + elif num_bytes < 0: + raise ParserError('invalid skip of {} bytes'.format(num_bytes)) + + def skip_to(self, offset, description='unknown'): + self.skip(offset - self.cursor, description) + + def skip_to_end(self, description='unknown'): + self.skip_to(len(self._data), description) + + +def _mac_absolute_time_to_posix(timestamp): + return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp()) + + +def _parse_safari_cookies_header(data, logger): + p = DataParser(data, logger) + p.expect_bytes(b'cook', 'database signature') + number_of_pages = p.read_uint(big_endian=True) + page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] + return page_sizes, p.cursor + + +def _parse_safari_cookies_page(data, jar, logger): + p = DataParser(data, logger) + p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') + number_of_cookies = p.read_uint() + record_offsets = [p.read_uint() for _ in range(number_of_cookies)] + if number_of_cookies == 0: + logger.debug('a cookies page of size {} has no cookies'.format(len(data))) + return + + p.skip_to(record_offsets[0], 'unknown page header field') + + for record_offset in record_offsets: + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) + p.skip_to_end('space in between pages') + + +def _parse_safari_cookies_record(data, jar, logger): + p = DataParser(data, logger) + record_size = p.read_uint() + p.skip(4, 'unknown record field 1') + flags = p.read_uint() + is_secure = bool(flags & 0x0001) + p.skip(4, 'unknown record field 2') + domain_offset = p.read_uint() + name_offset = p.read_uint() + path_offset = p.read_uint() + value_offset = p.read_uint() + p.skip(8, 'unknown record field 3') + expiration_date = _mac_absolute_time_to_posix(p.read_double()) + _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 + + try: + p.skip_to(domain_offset) + domain = p.read_cstring() + + p.skip_to(name_offset) + name = p.read_cstring() + + p.skip_to(path_offset) + path = p.read_cstring() + + p.skip_to(value_offset) + value = p.read_cstring() + except UnicodeDecodeError: + logger.warning('failed to parse cookie because UTF-8 decoding failed') + return record_size + + p.skip_to(record_size, 'space at the end of the record') + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + return record_size + + +def parse_safari_cookies(data, jar=None, logger=YDLLogger()): + """ + References: + - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc + - this data appears to be out of date but the important parts of the database structure is the same + - there are a few bytes here and there which are skipped during parsing + """ + if jar is None: + jar = YoutubeDLCookieJar() + page_sizes, body_start = _parse_safari_cookies_header(data, logger) + p = DataParser(data[body_start:], logger) + for page_size in page_sizes: + _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) + p.skip_to_end('footer') + return jar + + +def _get_linux_keyring_password(browser_keyring_name): + password = keyring.get_password('{} Keys'.format(browser_keyring_name), + '{} Safe Storage'.format(browser_keyring_name)) + if password is None: + # this sometimes occurs in KDE because chrome does not check hasEntry and instead + # just tries to read the value (which kwallet returns "") whereas keyring checks hasEntry + # to verify this: + # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" + # while starting chrome. + # this may be a bug as the intended behaviour is to generate a random password and store + # it, but that doesn't matter here. + password = '' + return password.encode('utf-8') + + +def _get_mac_keyring_password(browser_keyring_name, logger): + if KEYRING_AVAILABLE: + logger.debug('using keyring to obtain password') + password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name) + return password.encode('utf-8') + else: + logger.debug('using find-generic-password to obtain password') + proc = subprocess.Popen(['security', 'find-generic-password', + '-w', # write password to stdout + '-a', browser_keyring_name, # match 'account' + '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + proc.wait() + if proc.returncode == 0: + stdout = proc.stdout.read() + if stdout[-1:] == b'\n': + stdout = stdout[:-1] + return stdout + else: + return None + + +def _get_windows_v10_key(browser_root, logger): + path = _find_most_recently_used_file(browser_root, 'Local State') + if path is None: + logger.error('could not find local state file') + return None + with open(path, 'r') as f: + data = json.load(f) + try: + base64_key = data['os_crypt']['encrypted_key'] + except KeyError: + logger.error('no encrypted key in Local State') + return None + encrypted_key = compat_b64decode(base64_key) + prefix = b'DPAPI' + if not encrypted_key.startswith(prefix): + logger.error('invalid key') + return None + return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger) + + +PBKDF2_AVAILABLE = sys.version_info[:2] >= (3, 4) or CRYPTO_AVAILABLE + + +def pbkdf2_sha1(password, salt, iterations, key_length, logger=YDLLogger()): + try: + from hashlib import pbkdf2_hmac + return pbkdf2_hmac('sha1', password, salt, iterations, key_length) + except ImportError: + try: + from Crypto.Protocol.KDF import PBKDF2 + from Crypto.Hash import SHA1 + return PBKDF2(password, salt, key_length, iterations, hmac_hash_module=SHA1) + except ImportError: + logger.warning('PBKDF2 is not available. You must either upgrade to ' + 'python >= 3.4 or install the pycryptodome package', only_once=True) + return None + + +def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): + plaintext = aes_cbc_decrypt(bytes_to_intlist(ciphertext), + bytes_to_intlist(key), + bytes_to_intlist(initialization_vector)) + padding_length = plaintext[-1] + try: + return intlist_to_bytes(plaintext[:-padding_length]).decode('utf-8') + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?') + return None + + +def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): + cipher = AES.new(key, AES.MODE_GCM, nonce) + try: + plaintext = cipher.decrypt_and_verify(ciphertext, authentication_tag) + except ValueError: + logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?') + return None + + try: + return plaintext.decode('utf-8') + except UnicodeDecodeError: + logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?') + return None + + +def _decrypt_windows_dpapi(ciphertext, logger): + """ + References: + - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata + """ + from ctypes.wintypes import DWORD + + class DATA_BLOB(ctypes.Structure): + _fields_ = [('cbData', DWORD), + ('pbData', ctypes.POINTER(ctypes.c_char))] + + buffer = ctypes.create_string_buffer(ciphertext) + blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) + blob_out = DATA_BLOB() + ret = ctypes.windll.crypt32.CryptUnprotectData( + ctypes.byref(blob_in), # pDataIn + None, # ppszDataDescr: human readable description of pDataIn + None, # pOptionalEntropy: salt? + None, # pvReserved: must be NULL + None, # pPromptStruct: information about prompts to display + 0, # dwFlags + ctypes.byref(blob_out) # pDataOut + ) + if not ret: + logger.warning('failed to decrypt with DPAPI') + return None + + result = ctypes.string_at(blob_out.pbData, blob_out.cbData) + ctypes.windll.kernel32.LocalFree(blob_out.pbData) + return result + + +def _config_home(): + return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')) + + +def _open_database_copy(database_path, tmpdir): + # cannot open sqlite databases if they are already in use (e.g. by the browser) + database_copy_path = os.path.join(tmpdir, 'temporary.sqlite') + shutil.copy(database_path, database_copy_path) + conn = sqlite3.connect(database_copy_path) + return conn.cursor() + + +def _get_column_names(cursor, table_name): + table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall() + return [row[1].decode('utf-8') for row in table_info] + + +def _find_most_recently_used_file(root, filename): + # if there are multiple browser profiles, take the most recently used one + paths = [] + for root, dirs, files in os.walk(root): + for file in files: + if file == filename: + paths.append(os.path.join(root, file)) + return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) + + +def _merge_cookie_jars(jars): + output_jar = YoutubeDLCookieJar() + for jar in jars: + for cookie in jar: + output_jar.set_cookie(cookie) + if jar.filename is not None: + output_jar.filename = jar.filename + return output_jar + + +def _is_path(value): + return os.path.sep in value + + +def _parse_browser_specification(browser_name, profile=None): + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError('unsupported browser: "{}"'.format(browser_name)) + if profile is not None and _is_path(profile): + profile = os.path.expanduser(profile) + return browser_name, profile diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 28fdb086a..09cc5443a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -79,7 +79,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): username, password = self._get_login_info() # No authentication to be performed if username is None: - if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None: + + if (self._LOGIN_REQUIRED + and self._downloader.params.get('cookiefile') is None + and self._downloader.params.get('cookiesfrombrowser') is None): raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return True diff --git a/youtube_dl/options.py b/youtube_dl/options.py index f6d2b0898..46b3c39a1 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -17,6 +17,7 @@ from .utils import ( preferredencoding, write_string, ) +from .cookies import SUPPORTED_BROWSERS from .version import __version__ @@ -757,6 +758,18 @@ def parseOpts(overrideArguments=None): '--cookies', dest='cookiefile', metavar='FILE', help='File to read cookies from and dump cookie jar in') + filesystem.add_option( + '--cookies-from-browser', + dest='cookiesfrombrowser', metavar='BROWSER[:PROFILE]', + help=( + 'Load cookies from a user profile of the given web browser. ' + 'Currently supported browsers are: {}. ' + 'You can specify the user profile name or directory using ' + '"BROWSER:PROFILE_NAME" or "BROWSER:PROFILE_PATH". ' + 'If no profile is given, the most recently accessed one is used'.format( + '|'.join(sorted(SUPPORTED_BROWSERS)))) + ) + filesystem.add_option( '--cache-dir', dest='cachedir', default=None, metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')