[techtalks] Modernize

This commit is contained in:
Sergey M․ 2014-09-04 19:48:29 +07:00
parent feec0f56f5
commit c1a3c9ddb2

View File

@ -1,3 +1,5 @@
from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -11,24 +13,30 @@ class TechTalksIE(InfoExtractor):
_VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/' _VALID_URL = r'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
_TEST = { _TEST = {
u'url': u'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/', 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
u'playlist': [ 'info_dict': {
'id': '57758',
'title': 'Learning Topic Models --- Going beyond SVD',
},
'playlist': [
{ {
u'file': u'57758.flv', 'info_dict': {
u'info_dict': { 'id': '57758',
u'title': u'Learning Topic Models --- Going beyond SVD', 'ext': 'flv',
'title': 'Learning Topic Models --- Going beyond SVD',
}, },
}, },
{ {
u'file': u'57758-slides.flv', 'info_dict': {
u'info_dict': { 'id': '57758-slides',
u'title': u'Learning Topic Models --- Going beyond SVD', 'ext': 'flv',
'title': 'Learning Topic Models --- Going beyond SVD',
}, },
}, },
], ],
u'params': { 'params': {
# rtmp download # rtmp download
u'skip_download': True, 'skip_download': True,
}, },
} }
@ -36,30 +44,36 @@ class TechTalksIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
talk_id = mobj.group('id') talk_id = mobj.group('id')
webpage = self._download_webpage(url, talk_id) webpage = self._download_webpage(url, talk_id)
rtmp_url = self._search_regex(r'netConnectionUrl: \'(.*?)\'', webpage, rtmp_url = self._search_regex(
u'rtmp url') r'netConnectionUrl: \'(.*?)\'', webpage, 'rtmp url')
play_path = self._search_regex(r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"', play_path = self._search_regex(
webpage, u'presenter play path') r'href=\'(.*?)\' [^>]*id="flowplayer_presenter"',
webpage, 'presenter play path')
title = clean_html(get_element_by_attribute('class', 'title', webpage)) title = clean_html(get_element_by_attribute('class', 'title', webpage))
video_info = { video_info = {
'id': talk_id, 'id': talk_id,
'title': title, 'title': title,
'url': rtmp_url, 'url': rtmp_url,
'play_path': play_path, 'play_path': play_path,
'ext': 'flv', 'ext': 'flv',
} }
m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage) m_slides = re.search(r'<a class="slides" href=\'(.*?)\'', webpage)
if m_slides is None: if m_slides is None:
return video_info return video_info
else: else:
return [ return {
video_info, '_type': 'playlist',
# The slides video 'id': talk_id,
{ 'title': title,
'id': talk_id + '-slides', 'entries': [
'title': title, video_info,
'url': rtmp_url, # The slides video
'play_path': m_slides.group(1), {
'ext': 'flv', 'id': talk_id + '-slides',
}, 'title': title,
] 'url': rtmp_url,
'play_path': m_slides.group(1),
'ext': 'flv',
},
],
}