From 50aa5601ebb085540ba9018182fe2db2dcc167ae Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Sat, 12 Jun 2021 17:05:45 +0100 Subject: [PATCH] added Safari support --- test/test_cookies.py | 24 +++++- youtube_dl/cookies.py | 176 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 197 insertions(+), 3 deletions(-) diff --git a/test/test_cookies.py b/test/test_cookies.py index d4db9c669..091326012 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -1,8 +1,9 @@ import unittest +from datetime import datetime, timezone from youtube_dl import cookies from youtube_dl.cookies import LinuxChromeCookieDecryptor, WindowsChromeCookieDecryptor, CRYPTO_AVAILABLE, \ - MacChromeCookieDecryptor, Logger + MacChromeCookieDecryptor, Logger, parse_safari_cookies class MonkeyPatch: @@ -20,7 +21,6 @@ class MonkeyPatch: setattr(self._module, self._name, self._backup_value) -@unittest.skipIf(not CRYPTO_AVAILABLE, 'cryptography library not available') class TestCookies(unittest.TestCase): def test_chrome_cookie_decryptor_linux_derive_key(self): key = LinuxChromeCookieDecryptor.derive_key(b'abc') @@ -44,6 +44,7 @@ class TestCookies(unittest.TestCase): decryptor = LinuxChromeCookieDecryptor('Chrome') assert decryptor.decrypt(encrypted_value) == value + @unittest.skipIf(not CRYPTO_AVAILABLE, 'cryptography library not available') def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, '_get_windows_v10_key', lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2\xc3A\x00\x00\x80\xc3\x07:\xc3A' \ + b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \ + b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \ + b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(' + jar = parse_safari_cookies(cookies) + assert len(jar) == 1 + cookie = list(jar)[0] + assert cookie.domain == 'localhost' + assert cookie.port is None + assert cookie.path == '/' + assert cookie.name == 'foo' + assert cookie.value == 'test%20%3Bcookie' + assert not cookie.secure + expected_expiration = datetime(2021, 6, 18, 20, 39, 19, tzinfo=timezone.utc) + assert cookie.expires == expected_expiration.timestamp() diff --git a/youtube_dl/cookies.py b/youtube_dl/cookies.py index 04e30dc7c..5e9ca0563 100644 --- a/youtube_dl/cookies.py +++ b/youtube_dl/cookies.py @@ -4,9 +4,11 @@ import json import os import shutil import sqlite3 +import struct import subprocess import sys import warnings +from datetime import datetime, timedelta from youtube_dl.aes import aes_cbc_decrypt from youtube_dl.compat import compat_cookiejar_Cookie, compat_b64decode, Compat_TemporaryDirectory @@ -25,7 +27,7 @@ except ImportError: KEYRING_AVAILABLE = False -SUPPORTED_BROWSERS = ['brave', 'chrome', 'chromium', 'edge' 'firefox', 'opera', 'vivaldi'] +SUPPORTED_BROWSERS = ['brave', 'chrome', 'chromium', 'edge' 'firefox', 'opera', 'safari', 'vivaldi'] CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge' 'opera', 'vivaldi'} @@ -36,6 +38,9 @@ class Logger: def info(self, message): print(message) + def warning(self, message): + print(message, file=sys.stderr) + def error(self, message): print(message, file=sys.stderr) @@ -59,6 +64,8 @@ def load_cookies(cookie_file, browser_specification, ydl): def extract_cookies_from_browser(browser_name, profile=None, logger=Logger()): if browser_name == 'firefox': return _extract_firefox_cookies(profile, logger) + elif browser_name == 'safari': + return _extract_safari_cookies(profile, logger) elif browser_name in CHROMIUM_BASED_BROWSERS: return _extract_chrome_cookies(browser_name, profile, logger) else: @@ -366,6 +373,170 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') +def _extract_safari_cookies(profile, logger): + if profile is not None: + logger.error('safari does not support profiles') + if sys.platform != 'darwin': + raise ValueError('unsupported platform: {}'.format(sys.platform)) + + cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') + + if not os.path.isfile(cookies_path): + raise FileNotFoundError('could not find safari cookies database') + + with open(cookies_path, 'rb') as f: + cookies_data = f.read() + + jar = parse_safari_cookies(cookies_data, logger=logger) + logger.info('extracted {} cookies from safari'.format(len(jar))) + return jar + + +class ParserError(Exception): + pass + + +class DataParser: + def __init__(self, data, logger): + self._data = data + self.cursor = 0 + self._logger = logger + + def read_bytes(self, num_bytes): + if num_bytes < 0: + raise ParserError('invalid read of {} bytes'.format(num_bytes)) + end = self.cursor + num_bytes + if end > len(self._data): + raise ParserError('reached end of input') + data = self._data[self.cursor:end] + self.cursor = end + return data + + def expect_bytes(self, expected_value, message): + value = self.read_bytes(len(expected_value)) + if value != expected_value: + raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + + def read_uint(self, big_endian=False): + data_format = '>I' if big_endian else ' 0: + self._logger.debug('skipping {} bytes ({}): {}'.format( + num_bytes, description, self.read_bytes(num_bytes))) + elif num_bytes < 0: + raise ParserError('invalid skip of {} bytes'.format(num_bytes)) + + def skip_to(self, offset, description='unknown'): + self.skip(offset - self.cursor, description) + + def skip_to_end(self, description='unknown'): + self.skip_to(len(self._data), description) + + +def _mac_absolute_time_to_posix(timestamp): + return (datetime(2001, 1, 1, 0, 0) + timedelta(seconds=timestamp)).timestamp() + + +def _parse_safari_cookies_header(data, logger): + p = DataParser(data, logger) + p.expect_bytes(b'cook', 'database signature') + number_of_pages = p.read_uint(big_endian=True) + page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] + return page_sizes, p.cursor + + +def _parse_safari_cookies_page(data, jar, logger): + p = DataParser(data, logger) + p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') + number_of_cookies = p.read_uint() + record_offsets = [p.read_uint() for _ in range(number_of_cookies)] + if number_of_cookies == 0: + logger.debug('a cookies page of size {} has no cookies'.format(len(data))) + return + + p.skip_to(record_offsets[0], 'unknown page header field') + + for record_offset in record_offsets: + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) + p.skip_to_end('space in between pages') + + +def _parse_safari_cookies_record(data, jar, logger): + p = DataParser(data, logger) + record_size = p.read_uint() + p.skip(4, 'unknown record field 1') + flags = p.read_uint() + is_secure = bool(flags & 0x0001) + p.skip(4, 'unknown record field 2') + domain_offset = p.read_uint() + name_offset = p.read_uint() + path_offset = p.read_uint() + value_offset = p.read_uint() + p.skip(8, 'unknown record field 3') + expiration_date = _mac_absolute_time_to_posix(p.read_double()) + _creation_date = _mac_absolute_time_to_posix(p.read_double()) + + try: + p.skip_to(domain_offset) + domain = p.read_cstring() + + p.skip_to(name_offset) + name = p.read_cstring() + + p.skip_to(path_offset) + path = p.read_cstring() + + p.skip_to(value_offset) + value = p.read_cstring() + except UnicodeDecodeError: + warnings.warn('failed to parse cookie because UTF-8 decoding failed') + return record_size + + p.skip_to(record_size, 'space at the end of the record') + + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) + return record_size + + +def parse_safari_cookies(data, jar=None, logger=Logger()): + """ + References: + - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc + - this data appears to be out of date but the important parts of the database structure is the same + - there are a few bytes here and there which are skipped during parsing + """ + if jar is None: + jar = YoutubeDLCookieJar() + page_sizes, body_start = _parse_safari_cookies_header(data, logger) + p = DataParser(data[body_start:], logger) + for page_size in page_sizes: + _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) + p.skip_to_end('footer') + return jar + + def _get_linux_keyring_password(browser_keyring_name): password = keyring.get_password('{} Keys'.format(browser_keyring_name), '{} Safe Storage'.format(browser_keyring_name)) @@ -526,6 +697,9 @@ class YDLLogger(Logger): def info(self, message): self._ydl.to_screen(message) + def warning(self, message): + self._ydl.to_stderr(message) + def error(self, message): self._ydl.to_stderr(message)