Prepare signature function caching

This commit is contained in:
Philipp Hagemeister 2013-09-21 15:19:48 +02:00
parent 95dbd2f990
commit 8379969834

View File

@ -400,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs) super(YoutubeIE, self).__init__(*args, **kwargs)
self._jsplayer_cache = {} self._player_cache = {}
def report_video_webpage_download(self, video_id): def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage.""" """Report attempt to download video webpage."""
@ -423,26 +423,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'RTMP download detected') self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url): def _extract_signature_function(self, video_id, player_url):
id_m = re.match(r'.*-(?P<id>[^.]+)\.(?P<ext>[^.]+)$', player_url) id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$',
player_url)
player_type = id_m.group('ext') player_type = id_m.group('ext')
player_id = id_m.group('id') player_id = id_m.group('id')
# TODO read from filesystem cache
if player_type == 'js': if player_type == 'js':
code = self._download_webpage( code = self._download_webpage(
player_url, video_id, player_url, video_id,
note=u'Downloading %s player %s' % (player_type, jsplayer_id), note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url) errnote=u'Download of %s failed' % player_url)
return self._parse_sig_js(code) res = self._parse_sig_js(code)
elif player_tpye == 'swf': elif player_tpye == 'swf':
urlh = self._request_webpage( urlh = self._request_webpage(
player_url, video_id, player_url, video_id,
note=u'Downloading %s player %s' % (player_type, jsplayer_id), note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url) errnote=u'Download of %s failed' % player_url)
code = urlh.read() code = urlh.read()
return self._parse_sig_swf(code) res = self._parse_sig_swf(code)
else: else:
assert False, 'Invalid player type %r' % player_type assert False, 'Invalid player type %r' % player_type
# TODO write cache
return res
def _parse_sig_js(self, jscode): def _parse_sig_js(self, jscode):
funcname = self._search_regex( funcname = self._search_regex(
r'signature=([a-zA-Z]+)', jscode, r'signature=([a-zA-Z]+)', jscode,
@ -987,22 +994,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
initial_function = extract_function(u'decipher') initial_function = extract_function(u'decipher')
return lambda s: initial_function([s]) return lambda s: initial_function([s])
def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False): def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature""" """Turn the encrypted s field into a working signature"""
if jsplayer_url is not None: if player_url is not None:
try: try:
if jsplayer_url not in self._jsplayer_cache: if player_url not in self._player_cache:
self._jsplayer_cache[jsplayer_url] = self._extract_signature_function( func = self._extract_signature_function(
video_id, jsplayer_url video_id, player_url
) )
return self._jsplayer_cache[jsplayer_url]([s]) self._player_cache[player_url] = func
return self._player_cache[player_url](s)
except Exception as e: except Exception as e:
tb = traceback.format_exc() tb = traceback.format_exc()
self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb) self._downloader.report_warning(
u'Automatic signature extraction failed: ' + tb)
self._downloader.report_warning(u'Warning: Falling back to static signature algorithm') self._downloader.report_warning(
u'Warning: Falling back to static signature algorithm')
return self._static_decrypt_signature(s)
def _static_decrypt_signature(self, s):
if age_gate: if age_gate:
# The videos with age protection use another player, so the # The videos with age protection use another player, so the
# algorithms can be different. # algorithms can be different.
@ -1376,12 +1388,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
encrypted_sig = url_data['s'][0] encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'): if self._downloader.params.get('verbose'):
if age_gate: if age_gate:
player_version = self._search_regex(r'-(.+)\.swf$', player_version = self._search_regex(
player_url if player_url else 'NOT FOUND', r'-(.+)\.swf$',
player_url if player_url else None,
'flash player', fatal=False) 'flash player', fatal=False)
player_desc = 'flash player %s' % player_version player_desc = 'flash player %s' % player_version
else: else:
player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage, player_version = self._search_regex(
r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False) 'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version player_desc = u'html5 player %s' % player_version
@ -1389,15 +1403,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
if age_gate: if not age_gate:
jsplayer_url = None
else:
jsplayer_url_json = self._search_regex( jsplayer_url_json = self._search_regex(
r'"assets":.+?"js":\s*("[^"]+")', r'"assets":.+?"js":\s*("[^"]+")',
video_webpage, u'JS player URL') video_webpage, u'JS player URL')
jsplayer_url = json.loads(jsplayer_url_json) player_url = json.loads(jsplayer_url_json)
signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate) signature = self._decrypt_signature(
encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature url += '&signature=' + signature
if 'ratebypass' not in url: if 'ratebypass' not in url:
url += '&ratebypass=yes' url += '&ratebypass=yes'