This commit is contained in:
epitron 2013-09-30 14:29:13 -04:00
commit b039d5ebc5
8 changed files with 193 additions and 195 deletions

View File

@ -658,7 +658,7 @@ def _real_main(argv=None):
# Update version # Update version
if opts.update_self: if opts.update_self:
update_self(ydl.to_screen, opts.verbose, sys.argv[0]) update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing # Maybe do nothing
if len(all_urls) < 1: if len(all_urls) < 1:

View File

@ -1,8 +1,10 @@
import re import re
import xml.etree.ElementTree import xml.etree.ElementTree
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urlparse,
determine_ext, determine_ext,
) )
@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor):
u"playlist": [ u"playlist": [
{ {
u"file": u"manofsteel-trailer4.mov", u"file": u"manofsteel-trailer4.mov",
u"md5": u"11874af099d480cc09e103b189805d5f", u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8",
u"info_dict": { u"info_dict": {
u"duration": 111, u"duration": 111,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
u"title": u"Trailer 4", u"title": u"Trailer 4",
u"upload_date": u"20130523", u"upload_date": u"20130523",
u"uploader_id": u"wb", u"uploader_id": u"wb",
@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor):
}, },
{ {
u"file": u"manofsteel-trailer3.mov", u"file": u"manofsteel-trailer3.mov",
u"md5": u"07a0a262aae5afe68120eed61137ab34", u"md5": u"b8017b7131b721fb4e8d6f49e1df908c",
u"info_dict": { u"info_dict": {
u"duration": 182, u"duration": 182,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
u"title": u"Trailer 3", u"title": u"Trailer 3",
u"upload_date": u"20130417", u"upload_date": u"20130417",
u"uploader_id": u"wb", u"uploader_id": u"wb",
@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor):
}, },
{ {
u"file": u"manofsteel-trailer.mov", u"file": u"manofsteel-trailer.mov",
u"md5": u"e401fde0813008e3307e54b6f384cff1", u"md5": u"d0f1e1150989b9924679b441f3404d48",
u"info_dict": { u"info_dict": {
u"duration": 148, u"duration": 148,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
u"title": u"Trailer", u"title": u"Trailer",
u"upload_date": u"20121212", u"upload_date": u"20121212",
u"uploader_id": u"wb", u"uploader_id": u"wb",
@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor):
}, },
{ {
u"file": u"manofsteel-teaser.mov", u"file": u"manofsteel-teaser.mov",
u"md5": u"76b392f2ae9e7c98b22913c10a639c97", u"md5": u"5fe08795b943eb2e757fa95cb6def1cb",
u"info_dict": { u"info_dict": {
u"duration": 93, u"duration": 93,
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
u"title": u"Teaser", u"title": u"Teaser",
u"upload_date": u"20120721", u"upload_date": u"20120721",
u"uploader_id": u"wb", u"uploader_id": u"wb",
@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor):
] ]
} }
_JSON_RE = r'iTunes.playURL\((.*?)\);'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
movie = mobj.group('movie') movie = mobj.group('movie')
uploader_id = mobj.group('company') uploader_id = mobj.group('company')
playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc' playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_snippet = self._download_webpage(playlist_url, movie) playlist_snippet = self._download_webpage(playlist_url, movie)
playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet) playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
# The ' in the onClick attributes are not escaped, it couldn't be parsed
# with xml.etree.ElementTree.fromstring
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
playlist_html = u'<html>' + playlist_cleaned + u'</html>' playlist_html = u'<html>' + playlist_cleaned + u'</html>'
size_cache = {}
doc = xml.etree.ElementTree.fromstring(playlist_html) doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = [] playlist = []
for li in doc.findall('./div/ul/li'): for li in doc.findall('./div/ul/li'):
title = li.find('.//h3').text on_click = li.find('.//a').attrib['onClick']
trailer_info_json = self._search_regex(self._JSON_RE,
on_click, u'trailer info')
trailer_info = json.loads(trailer_info_json)
title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src'] thumbnail = li.find('.//img').attrib['src']
upload_date = trailer_info['posted'].replace('-', '')
date_el = li.find('.//p') runtime = trailer_info['runtime']
upload_date = None m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
if m:
upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
runtime_el = date_el.find('./br')
m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
duration = None duration = None
if m: if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
first_url = trailer_info['url']
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
settings = json.loads(settings_json)
formats = [] formats = []
for formats_el in li.findall('.//a'): for format in settings['metadata']['sizes']:
if formats_el.attrib['class'] != 'OverlayPanel': # The src is a file pointing to the real video file
continue format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
target = formats_el.attrib['target']
format_code = formats_el.text
if 'Automatic' in format_code:
continue
size_q = formats_el.attrib['href']
size_id = size_q.rpartition('#videos-')[2]
if size_id not in size_cache:
size_url = url + size_q
sizepage_html = self._download_webpage(
size_url, movie,
note=u'Downloading size info %s' % size_id,
errnote=u'Error while downloading size info %s' % size_id,
)
_doc = xml.etree.ElementTree.fromstring(sizepage_html)
size_cache[size_id] = _doc
sizepage_doc = size_cache[size_id]
links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
for vid_a in links:
href = vid_a.get('href')
if not href.endswith(target):
continue
detail_q = href.partition('#')[0]
detail_url = url + '/' + detail_q
m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
detail_id = m.group('detail_id')
detail_html = self._download_webpage(
detail_url, movie,
note=u'Downloading detail %s %s' % (detail_id, size_id),
errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
)
detail_doc = xml.etree.ElementTree.fromstring(detail_html)
movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
assert movie_link_el.get('class') == 'movieLink'
movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
ext = determine_ext(movie_link)
assert ext == 'mov'
formats.append({ formats.append({
'format': format_code, 'url': format_url,
'ext': ext, 'ext': determine_ext(format_url),
'url': movie_link, 'format': format['type'],
'width': format['width'],
'height': int(format['height']),
}) })
formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = { info = {
'_type': 'video', '_type': 'video',

View File

@ -14,8 +14,15 @@ from ..utils import (
ExtractorError, ExtractorError,
) )
class DailymotionBaseInfoExtractor(InfoExtractor):
@staticmethod
def _build_request(url):
"""Build a request with the family filter disabled"""
request = compat_urllib_request.Request(url)
request.add_header('Cookie', 'family_filter=off')
return request
class DailymotionIE(SubtitlesInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion""" """Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)' _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
@ -40,8 +47,7 @@ class DailymotionIE(SubtitlesInfoExtractor):
url = 'http://www.dailymotion.com/video/%s' % video_id url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url) request = self._build_request(url)
request.add_header('Cookie', 'family_filter=off')
webpage = self._download_webpage(request, video_id) webpage = self._download_webpage(request, video_id)
# Extract URL, uploader and title from webpage # Extract URL, uploader and title from webpage
@ -113,7 +119,7 @@ class DailymotionIE(SubtitlesInfoExtractor):
return {} return {}
class DailymotionPlaylistIE(InfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist' IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/' _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>.+?)/'
_MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>' _MORE_PAGES_INDICATOR = r'<div class="next">.*?<a.*?href="/playlist/.+?".*?>.*?</a>.*?</div>'
@ -122,7 +128,8 @@ class DailymotionPlaylistIE(InfoExtractor):
def _extract_entries(self, id): def _extract_entries(self, id):
video_ids = [] video_ids = []
for pagenum in itertools.count(1): for pagenum in itertools.count(1):
webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum), request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum) id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage) playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)

View File

@ -63,13 +63,13 @@ class RTLnowIE(InfoExtractor):
}, },
}, },
{ {
u'url': u'http://www.rtlnitronow.de/recht-ordnung/fahrradpolizei-koeln-fischereiaufsicht-ruegen.php?film_id=124311&player=1&season=1', u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
u'file': u'124311.flv', u'file': u'127367.flv',
u'info_dict': { u'info_dict': {
u'upload_date': u'20130830', u'upload_date': u'20130926',
u'title': u'Recht & Ordnung - Fahrradpolizei Köln & Fischereiaufsicht Rügen', u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
u'description': u'Fahrradpolizei Köln & Fischereiaufsicht Rügen', u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
u'thumbnail': u'http://autoimg.static-fra.de/nitronow/338273/1500x1500/image2.jpg' u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
}, },
u'params': { u'params': {
u'skip_download': True, u'skip_download': True,

View File

@ -1,4 +1,3 @@
import datetime
import itertools import itertools
import json import json
import re import re
@ -6,86 +5,85 @@ import re
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..utils import ( from ..utils import (
compat_urllib_parse, compat_urllib_parse,
compat_urlparse,
ExtractorError, determine_ext,
clean_html,
) )
class YahooIE(InfoExtractor): class YahooIE(InfoExtractor):
IE_DESC = u'Yahoo screen' IE_DESC = u'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
_TEST = { _TESTS = [
{
u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
u'file': u'214727115.flv', u'file': u'214727115.mp4',
u'md5': u'2e717f169c1be93d84d3794a00d4a325',
u'info_dict': { u'info_dict': {
u"title": u"Julian Smith & Travis Legg Watch Julian Smith" u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
u'description': u'Julian and Travis watch Julian Smith',
}, },
u'skip': u'Requires rtmpdump' },
} {
u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
u'file': u'103000935.flv',
u'info_dict': {
u'title': u'The Cougar Lies with Spanish Moss',
u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage)
if m_id is None: items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$',
# TODO: Check which url parameters are required webpage, u'items', flags=re.MULTILINE)
info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id items = json.loads(items_json)
webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage') info = items['mediaItems']['query']['results']['mediaObj'][0]
info_re = r'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.* meta = info['meta']
<description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
<media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.*
<media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB"
'''
self.report_extraction(video_id)
m_info = re.search(info_re, webpage, re.VERBOSE|re.DOTALL)
if m_info is None:
raise ExtractorError(u'Unable to extract video info')
video_title = m_info.group('title')
video_description = m_info.group('description')
video_thumb = m_info.group('thumb')
video_date = m_info.group('date')
video_date = datetime.datetime.strptime(video_date, '%m/%d/%Y').strftime('%Y%m%d')
# TODO: Find a way to get mp4 videos formats = []
rest_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id for s in info['streams']:
webpage = self._download_webpage(rest_url, video_id, u'Downloading video url webpage') format_info = {
m_rest = re.search(r'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage) 'width': s.get('width'),
video_url = m_rest.group('url') 'height': s.get('height'),
video_path = m_rest.group('path') 'bitrate': s.get('bitrate'),
if m_rest is None:
raise ExtractorError(u'Unable to extract video url')
else: # We have to use a different method if another id is defined
long_id = m_id.group('new_id')
info_url = 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id + '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335'
webpage = self._download_webpage(info_url, video_id, u'Downloading info json')
json_str = re.search(r'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage).group(1)
info = json.loads(json_str)
res = info[u'query'][u'results'][u'mediaObj'][0]
stream = res[u'streams'][0]
video_path = stream[u'path']
video_url = stream[u'host']
meta = res[u'meta']
video_title = meta[u'title']
video_description = meta[u'description']
video_thumb = meta[u'thumbnail']
video_date = None # I can't find it
info_dict = {
'id': video_id,
'url': video_url,
'play_path': video_path,
'title':video_title,
'description': video_description,
'thumbnail': video_thumb,
'upload_date': video_date,
'ext': 'flv',
} }
return info_dict
host = s['host']
path = s['path']
if host.startswith('rtmp'):
format_info.update({
'url': host,
'play_path': path,
'ext': 'flv',
})
else:
format_url = compat_urlparse.urljoin(host, path)
format_info['url'] = format_url
format_info['ext'] = determine_ext(format_url)
formats.append(format_info)
formats = sorted(formats, key=lambda f:(f['height'], f['width']))
info = {
'id': video_id,
'title': meta['title'],
'formats': formats,
'description': clean_html(meta['description']),
'thumbnail': meta['thumbnail'],
}
# TODO: Remove when #980 has been merged
info.update(formats[-1])
return info
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):
IE_DESC = u'Yahoo screen search' IE_DESC = u'Yahoo screen search'

View File

@ -2,16 +2,14 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
unescapeHTML,
) )
class ZDFIE(InfoExtractor): class ZDFIE(InfoExtractor):
_VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?' _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P<hash>#)?\/(.*beitrag\/video\/)(?P<video_id>[^/\?]+)(?:\?.*)?'
_TITLE = r'<h1(?: class="beitragHeadline")?>(?P<title>.*)</h1>'
_MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>' _MEDIA_STREAM = r'<a href="(?P<video_url>.+(?P<media_type>.streaming).+/zdf/(?P<quality>[^\/]+)/[^"]*)".+class="play".+>'
_MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
_RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -19,6 +17,9 @@ class ZDFIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
if mobj.group('hash'):
url = url.replace(u'#', u'', 1)
html = self._download_webpage(url, video_id) html = self._download_webpage(url, video_id)
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
if streams is None: if streams is None:
@ -27,39 +28,48 @@ class ZDFIE(InfoExtractor):
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url # s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url # s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
# choose first/default media type and highest quality for now # choose first/default media type and highest quality for now
for s in streams: #find 300 - dsl1000mbit def stream_pref(s):
if s['quality'] == '300' and s['media_type'] == 'wstreaming': TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
stream_=s try:
break type_pref = TYPE_ORDER.index(s['media_type'])
for s in streams: #find veryhigh - dsl2000mbit except ValueError:
if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working type_pref = 999
stream_=s
break QUALITY_ORDER = ['veryhigh', '300']
if stream_ is None: try:
quality_pref = QUALITY_ORDER.index(s['quality'])
except ValueError:
quality_pref = 999
return (type_pref, quality_pref)
sorted_streams = sorted(streams, key=stream_pref)
if not sorted_streams:
raise ExtractorError(u'No stream found.') raise ExtractorError(u'No stream found.')
stream = sorted_streams[0]
media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL') media_link = self._download_webpage(
stream['video_url'],
video_id,
u'Get stream URL')
self.report_extraction(video_id) MMS_STREAM = r'href="(?P<video_url>mms://[^"]*)"'
mobj = re.search(self._TITLE, html) RTSP_STREAM = r'(?P<video_url>rtsp://[^"]*.mp4)'
mobj = re.search(self._MEDIA_STREAM, media_link)
if mobj is None: if mobj is None:
raise ExtractorError(u'Cannot extract title') mobj = re.search(RTSP_STREAM, media_link)
title = unescapeHTML(mobj.group('title'))
mobj = re.search(self._MMS_STREAM, media_link)
if mobj is None:
mobj = re.search(self._RTSP_STREAM, media_link)
if mobj is None: if mobj is None:
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL') raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
mms_url = mobj.group('video_url') video_url = mobj.group('video_url')
mobj = re.search('(.*)[.](?P<ext>[^.]+)', mms_url) title = self._html_search_regex(
if mobj is None: r'<h1(?: class="beitragHeadline")?>(.*?)</h1>',
raise ExtractorError(u'Cannot extract extention') html, u'title')
ext = mobj.group('ext')
return [{'id': video_id, return {
'url': mms_url, 'id': video_id,
'url': video_url,
'title': title, 'title': title,
'ext': ext 'ext': determine_ext(video_url)
}] }

View File

@ -1,6 +1,9 @@
import io
import json import json
import traceback import traceback
import hashlib import hashlib
import subprocess
import sys
from zipimport import zipimporter from zipimport import zipimporter
from .utils import * from .utils import *
@ -34,7 +37,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False if signature != sha256(message).digest(): return False
return True return True
def update_self(to_screen, verbose, filename): def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository""" """Update the program file with the latest version from the repository"""
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/" UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
@ -42,7 +45,6 @@ def update_self(to_screen, verbose, filename):
JSON_URL = UPDATE_URL + 'versions.json' JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537) UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"): if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.') to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
return return
@ -75,11 +77,18 @@ def update_self(to_screen, verbose, filename):
to_screen(u'ERROR: the versions file signature is invalid. Aborting.') to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
return return
to_screen(u'Updating to version ' + versions_info['latest'] + '...') version_id = versions_info['latest']
version = versions_info['versions'][versions_info['latest']] to_screen(u'Updating to version ' + version_id + '...')
version = versions_info['versions'][version_id]
print_notes(to_screen, versions_info['versions']) print_notes(to_screen, versions_info['versions'])
filename = sys.argv[0]
# Py2EXE: Filename could be different
if hasattr(sys, "frozen") and not os.path.isfile(filename):
if os.path.isfile(filename + u'.exe'):
filename += u'.exe'
if not os.access(filename, os.W_OK): if not os.access(filename, os.W_OK):
to_screen(u'ERROR: no write permissions on %s' % filename) to_screen(u'ERROR: no write permissions on %s' % filename)
return return
@ -116,16 +125,18 @@ def update_self(to_screen, verbose, filename):
try: try:
bat = os.path.join(directory, 'youtube-dl-updater.bat') bat = os.path.join(directory, 'youtube-dl-updater.bat')
b = open(bat, 'w') with io.open(bat, 'w') as batfile:
b.write(""" batfile.write(u"""
echo Updating youtube-dl... @echo off
echo Waiting for file handle to be closed ...
ping 127.0.0.1 -n 5 -w 1000 > NUL ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s" move /Y "%s.new" "%s" > NUL
del "%s" echo Updated youtube-dl to version %s.
\n""" %(exe, exe, bat)) start /b "" cmd /c del "%%~f0"&exit /b"
b.close() \n""" % (exe, exe, version_id))
os.startfile(bat) subprocess.Popen([bat]) # Continues to run in the background
return # Do not show premature success messages
except (IOError, OSError) as err: except (IOError, OSError) as err:
if verbose: to_screen(compat_str(traceback.format_exc())) if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version') to_screen(u'ERROR: unable to overwrite current version')

View File

@ -1,2 +1,2 @@
__version__ = '2013.09.24.2' __version__ = '2013.09.29'