Compare commits

..

15 Commits

Author SHA1 Message Date
Ricardo Garcia
5745bfdcdc Bump version number 2010-10-31 11:24:32 +01:00
Ricardo Garcia
320becd692 Remove trails from the "append_const" change (fixes issue #23) 2010-10-31 11:24:32 +01:00
Ricardo Garcia
968aa88438 Only catch UnavailableFormatError in call to process_info 2010-10-31 11:24:32 +01:00
Ricardo Garcia
cbfff4db63 Verify URLs in simulate mode (fixes issue #22) 2010-10-31 11:24:32 +01:00
Ricardo Garcia
781daeabdb Restore "INTERNAL" version number 2010-10-31 11:24:32 +01:00
Ricardo Garcia
705804f5d1 Update version number in LATEST_VERSION 2010-10-31 11:24:24 +01:00
Ricardo Garcia
1d50e3d153 Bump version number 2010-10-31 11:24:19 +01:00
Ricardo Garcia
d69a1c9189 Handle "content too short" errors properly 2010-10-31 11:24:19 +01:00
Ricardo Garcia
488f619471 Close video file before removing it. 2010-10-31 11:24:19 +01:00
Ricardo Garcia
097ba9472b Remove .hgignore from version tracking 2010-10-31 11:24:19 +01:00
Ricardo Garcia
554f3e284c Add LATEST_VERSION to further ease checking which is the latest stable version 2010-10-31 11:24:19 +01:00
Ricardo Garcia
cab60d710a Put back INTERNAL version 2010-10-31 11:24:19 +01:00
Ricardo Garcia
152edc0d4c Set version number 2010-10-31 11:24:16 +01:00
Ricardo Garcia
b74c859d0f Use store_const instead of append_const as the latter requires Python 2.5 2010-10-31 11:24:16 +01:00
Ricardo Garcia
0e54320009 Restore INTERNAL version number 2010-10-31 11:24:16 +01:00
3 changed files with 89 additions and 64 deletions

View File

@@ -1,2 +0,0 @@
syntax: glob
.*.swp

1
LATEST_VERSION Normal file
View File

@@ -0,0 +1 @@
2009.05.25

View File

@@ -58,6 +58,22 @@ class UnavailableFormatError(Exception):
This exception will be thrown when a video is requested
in a format that is not available for that video.
"""
pass
class ContentTooShortError(Exception):
"""Content Too Short exception.
This exception may be raised by FileDownloader objects when a file they
download is too small for what the server announced first, indicating
the connection was probably interrupted.
"""
# Both in bytes
downloaded = None
expected = None
def __init__(self, downloaded, expected):
self.downloaded = downloaded
self.expected = expected
class FileDownloader(object):
"""File Downloader class.
@@ -184,6 +200,14 @@ class FileDownloader(object):
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return long(round(number * multiplier))
@staticmethod
def verify_url(url):
"""Verify a URL is valid and data could be downloaded."""
request = urllib2.Request(url, None, std_headers)
data = urllib2.urlopen(request)
data.read(1)
data.close()
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
self._ies.append(ie)
@@ -249,16 +273,21 @@ class FileDownloader(object):
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
# Forced printings
if self.params.get('forcetitle', False):
print info_dict['title'].encode(locale.getpreferredencoding())
if self.params.get('forceurl', False):
print info_dict['url'].encode(locale.getpreferredencoding())
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
return
try:
self.verify_url(info_dict['url'])
except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
raise UnavailableFormatError
# Forced printings
if self.params.get('forcetitle', False):
print info_dict['title'].encode(locale.getpreferredencoding())
if self.params.get('forceurl', False):
print info_dict['url'].encode(locale.getpreferredencoding())
return
try:
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
@@ -286,11 +315,15 @@ class FileDownloader(object):
self._do_download(outstream, info_dict['url'])
outstream.close()
except (OSError, IOError), err:
outstream.close()
os.remove(filename)
raise UnavailableFormatError
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self.trouble('ERROR: unable to download video data: %s' % str(err))
return
except (ContentTooShortError, ), err:
self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
try:
self.post_process(filename, info_dict)
@@ -364,7 +397,7 @@ class FileDownloader(object):
self.report_finish()
if data_len is not None and str(byte_counter) != data_len:
raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
raise ContentTooShortError(byte_counter, long(data_len))
class InfoExtractor(object):
"""Information Extractor class.
@@ -587,53 +620,53 @@ class YoutubeIE(InfoExtractor):
best_quality = True
while True:
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
# Normalize URL, including format
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
if format_param is not None:
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
request = urllib2.Request(normalized_url, None, std_headers)
try:
# Extension
video_extension = self._video_extensions.get(format_param, 'flv')
self.report_webpage_download(video_id)
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
return
self.report_information_extraction(video_id)
# "t" param
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
return
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
self.report_video_url(video_id, video_real_url)
# Normalize URL, including format
normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
if format_param is not None:
normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
request = urllib2.Request(normalized_url, None, std_headers)
try:
self.report_webpage_download(video_id)
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
return
self.report_information_extraction(video_id)
# "t" param
mobj = re.search(r', "t": "([^"]+)"', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract "t" parameter')
return
video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1))
if format_param is not None:
video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
self.report_video_url(video_id, video_real_url)
# uploader
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = mobj.group(1)
# uploader
mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
video_uploader = mobj.group(1)
# title
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
# title
mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = mobj.group(1).decode('utf-8')
video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
video_title = video_title.replace(os.sep, u'%')
# simplified title
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
simple_title = simple_title.strip(ur'_')
# simplified title
simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
simple_title = simple_title.strip(ur'_')
try:
# Process video information
self._downloader.process_info({
'id': video_id.decode('utf-8'),
@@ -996,7 +1029,7 @@ if __name__ == '__main__':
# Parse command line
parser = optparse.OptionParser(
usage='Usage: %prog [options] url...',
version='2009.05.11',
version='2009.05.25',
conflict_handler='resolve',
)
@@ -1022,11 +1055,11 @@ if __name__ == '__main__':
video_format.add_option('-f', '--format',
action='append', dest='format', metavar='FMT', help='video format code')
video_format.add_option('-b', '--best-quality',
action='append_const', dest='format', help='download the best quality video possible', const='0')
action='store_const', dest='format', help='download the best quality video possible', const='0')
video_format.add_option('-m', '--mobile-version',
action='append_const', dest='format', help='alias for -f 17', const='17')
action='store_const', dest='format', help='alias for -f 17', const='17')
video_format.add_option('-d', '--high-def',
action='append_const', dest='format', help='alias for -f 22', const='22')
action='store_const', dest='format', help='alias for -f 22', const='22')
parser.add_option_group(video_format)
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
@@ -1084,13 +1117,6 @@ if __name__ == '__main__':
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.format is not None and len(opts.format) > 1:
parser.error(u'pass at most one of the video format option flags (-f, -b, -m, -d)')
if opts.format is None:
real_format = None
else:
real_format = opts.format[0]
# Information extractors
youtube_ie = YoutubeIE()
@@ -1107,7 +1133,7 @@ if __name__ == '__main__':
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'simulate': (opts.simulate or opts.geturl or opts.gettitle),
'format': real_format,
'format': opts.format,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding()))
or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')