mirror of
				https://github.com/ytdl-org/youtube-dl
				synced 2025-10-22 16:18:37 +09:00 
			
		
		
		
	Compare commits
	
		
			43 Commits
		
	
	
		
			2009.05.25
			...
			2010.01.05
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | fe788f2c6f | ||
|   | 75a4cf3c97 | ||
|   | 0487b407a1 | ||
|   | a692ca7c49 | ||
|   | 9c457d2a20 | ||
|   | c39c05cdd7 | ||
|   | 29f0756805 | ||
|   | d9bc015b3c | ||
|   | 4bec29ef4b | ||
|   | ab1f697827 | ||
|   | 583c714fde | ||
|   | 850ab76560 | ||
|   | f5a5bec351 | ||
|   | f94b636c3e | ||
|   | 0833f1eb83 | ||
|   | ad0525b3e6 | ||
|   | 30edbf89e4 | ||
|   | eae2666cb4 | ||
|   | 2a04438c7c | ||
|   | dd24ff44ab | ||
|   | 304a4d85ea | ||
|   | d899774377 | ||
|   | fade05990c | ||
|   | e5b1604882 | ||
|   | 0c8beb43f2 | ||
|   | 71b7300e63 | ||
|   | 8497c36d5a | ||
|   | 110cd3462e | ||
|   | 18963a36b0 | ||
|   | df1ceb1fd9 | ||
|   | 7eb0e89742 | ||
|   | 8b07dec5f6 | ||
|   | 113e5266cc | ||
|   | 55e7c75e12 | ||
|   | ff21a710ae | ||
|   | 7374795552 | ||
|   | 0cd61126fc | ||
|   | e1f18b8a84 | ||
|   | 6a0015a7e0 | ||
|   | 7db85b2c70 | ||
|   | f76c2df64e | ||
|   | daa88ccc2e | ||
|   | eb5d184157 | 
| @@ -1 +1 @@ | ||||
| 2009.05.25 | ||||
| 2010.01.05 | ||||
|   | ||||
							
								
								
									
										359
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										359
									
								
								youtube-dl
									
									
									
									
									
								
							| @@ -13,13 +13,15 @@ import os.path | ||||
| import re | ||||
| import socket | ||||
| import string | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
| import urllib | ||||
| import urllib2 | ||||
| import urlparse | ||||
|  | ||||
| std_headers = { | ||||
| 	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8', | ||||
| 	'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2', | ||||
| 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', | ||||
| 	'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', | ||||
| 	'Accept-Language': 'en-us,en;q=0.5', | ||||
| @@ -27,6 +29,22 @@ std_headers = { | ||||
|  | ||||
| simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') | ||||
|  | ||||
| def preferredencoding(): | ||||
| 	"""Get preferred encoding. | ||||
|  | ||||
| 	Returns the best encoding scheme for the system, based on | ||||
| 	locale.getpreferredencoding() and some further tweaks. | ||||
| 	""" | ||||
| 	def yield_preferredencoding(): | ||||
| 		try: | ||||
| 			pref = locale.getpreferredencoding() | ||||
| 			u'TEST'.encode(pref) | ||||
| 		except: | ||||
| 			pref = 'UTF-8' | ||||
| 		while True: | ||||
| 			yield pref | ||||
| 	return yield_preferredencoding().next() | ||||
|  | ||||
| class DownloadError(Exception): | ||||
| 	"""Download Error exception. | ||||
| 	 | ||||
| @@ -114,6 +132,7 @@ class FileDownloader(object): | ||||
| 	ignoreerrors:	Do not stop on download errors. | ||||
| 	ratelimit:	Download speed limit, in bytes/sec. | ||||
| 	nooverwrites:	Prevent overwriting files. | ||||
| 	continuedl:	Try to continue downloads if possible. | ||||
| 	""" | ||||
|  | ||||
| 	params = None | ||||
| @@ -142,10 +161,12 @@ class FileDownloader(object): | ||||
| 	def format_bytes(bytes): | ||||
| 		if bytes is None: | ||||
| 			return 'N/A' | ||||
| 		if bytes == 0: | ||||
| 		if type(bytes) is str: | ||||
| 			bytes = float(bytes) | ||||
| 		if bytes == 0.0: | ||||
| 			exponent = 0 | ||||
| 		else: | ||||
| 			exponent = long(math.log(float(bytes), 1024.0)) | ||||
| 			exponent = long(math.log(bytes, 1024.0)) | ||||
| 		suffix = 'bkMGTPEZY'[exponent] | ||||
| 		converted = float(bytes) / float(1024**exponent) | ||||
| 		return '%.2f%s' % (converted, suffix) | ||||
| @@ -182,13 +203,13 @@ class FileDownloader(object): | ||||
| 		new_min = max(bytes / 2.0, 1.0) | ||||
| 		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB | ||||
| 		if elapsed_time < 0.001: | ||||
| 			return int(new_max) | ||||
| 			return long(new_max) | ||||
| 		rate = bytes / elapsed_time | ||||
| 		if rate > new_max: | ||||
| 			return int(new_max) | ||||
| 			return long(new_max) | ||||
| 		if rate < new_min: | ||||
| 			return int(new_min) | ||||
| 		return int(rate) | ||||
| 			return long(new_min) | ||||
| 		return long(rate) | ||||
|  | ||||
| 	@staticmethod | ||||
| 	def parse_bytes(bytestr): | ||||
| @@ -202,11 +223,13 @@ class FileDownloader(object): | ||||
|  | ||||
| 	@staticmethod | ||||
| 	def verify_url(url): | ||||
| 		"""Verify a URL is valid and data could be downloaded.""" | ||||
| 		"""Verify a URL is valid and data could be downloaded. Return real data URL.""" | ||||
| 		request = urllib2.Request(url, None, std_headers) | ||||
| 		data = urllib2.urlopen(request) | ||||
| 		data.read(1) | ||||
| 		url = data.geturl() | ||||
| 		data.close() | ||||
| 		return url | ||||
|  | ||||
| 	def add_info_extractor(self, ie): | ||||
| 		"""Add an InfoExtractor object to the end of the list.""" | ||||
| @@ -221,12 +244,12 @@ class FileDownloader(object): | ||||
| 	def to_stdout(self, message, skip_eol=False): | ||||
| 		"""Print message to stdout if not in quiet mode.""" | ||||
| 		if not self.params.get('quiet', False): | ||||
| 			print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()), | ||||
| 			print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), | ||||
| 			sys.stdout.flush() | ||||
| 	 | ||||
| 	def to_stderr(self, message): | ||||
| 		"""Print message to stderr.""" | ||||
| 		print >>sys.stderr, message | ||||
| 		print >>sys.stderr, message.encode(preferredencoding()) | ||||
| 	 | ||||
| 	def fixed_template(self): | ||||
| 		"""Checks if the output template is fixed.""" | ||||
| @@ -267,6 +290,18 @@ class FileDownloader(object): | ||||
| 		self.to_stdout(u'\r[download] %s of %s at %s ETA %s' % | ||||
| 				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True) | ||||
|  | ||||
| 	def report_resuming_byte(self, resume_len): | ||||
| 		"""Report attemtp to resume at given byte.""" | ||||
| 		self.to_stdout(u'[download] Resuming download at byte %s' % resume_len) | ||||
| 	 | ||||
| 	def report_file_already_downloaded(self, file_name): | ||||
| 		"""Report file has already been fully downloaded.""" | ||||
| 		self.to_stdout(u'[download] %s has already been downloaded' % file_name) | ||||
| 	 | ||||
| 	def report_unable_to_resume(self): | ||||
| 		"""Report it was impossible to resume download.""" | ||||
| 		self.to_stdout(u'[download] Unable to resume') | ||||
| 	 | ||||
| 	def report_finish(self): | ||||
| 		"""Report download finished.""" | ||||
| 		self.to_stdout(u'') | ||||
| @@ -276,15 +311,15 @@ class FileDownloader(object): | ||||
| 		# Do nothing else if in simulate mode | ||||
| 		if self.params.get('simulate', False): | ||||
| 			try: | ||||
| 				self.verify_url(info_dict['url']) | ||||
| 				info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') | ||||
| 			except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| 				raise UnavailableFormatError | ||||
|  | ||||
| 			# Forced printings | ||||
| 			if self.params.get('forcetitle', False): | ||||
| 				print info_dict['title'].encode(locale.getpreferredencoding()) | ||||
| 				print info_dict['title'].encode(preferredencoding()) | ||||
| 			if self.params.get('forceurl', False): | ||||
| 				print info_dict['url'].encode(locale.getpreferredencoding()) | ||||
| 				print info_dict['url'].encode(preferredencoding()) | ||||
|  | ||||
| 			return | ||||
| 			 | ||||
| @@ -292,11 +327,10 @@ class FileDownloader(object): | ||||
| 			template_dict = dict(info_dict) | ||||
| 			template_dict['epoch'] = unicode(long(time.time())) | ||||
| 			filename = self.params['outtmpl'] % template_dict | ||||
| 			self.report_destination(filename) | ||||
| 		except (ValueError, KeyError), err: | ||||
| 			self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) | ||||
| 		if self.params['nooverwrites'] and os.path.exists(filename): | ||||
| 			self.to_stderr('WARNING: file exists: %s; skipping' % filename) | ||||
| 		if self.params.get('nooverwrites', False) and os.path.exists(filename): | ||||
| 			self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) | ||||
| 			return | ||||
|  | ||||
| 		try: | ||||
| @@ -306,17 +340,8 @@ class FileDownloader(object): | ||||
| 			return | ||||
|  | ||||
| 		try: | ||||
| 			outstream = open(filename, 'wb') | ||||
| 			success = self._do_download(filename, info_dict['url'].encode('utf-8')) | ||||
| 		except (OSError, IOError), err: | ||||
| 			self.trouble('ERROR: unable to open for writing: %s' % str(err)) | ||||
| 			return | ||||
|  | ||||
| 		try: | ||||
| 			self._do_download(outstream, info_dict['url']) | ||||
| 			outstream.close() | ||||
| 		except (OSError, IOError), err: | ||||
| 			outstream.close() | ||||
| 			os.remove(filename) | ||||
| 			raise UnavailableFormatError | ||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| 			self.trouble('ERROR: unable to download video data: %s' % str(err)) | ||||
| @@ -325,6 +350,7 @@ class FileDownloader(object): | ||||
| 			self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) | ||||
| 			return | ||||
|  | ||||
| 		if success: | ||||
| 			try: | ||||
| 				self.post_process(filename, info_dict) | ||||
| 			except (PostProcessingError), err: | ||||
| @@ -366,21 +392,78 @@ class FileDownloader(object): | ||||
| 			if info is None: | ||||
| 				break | ||||
| 	 | ||||
| 	def _do_download(self, stream, url): | ||||
| 	def _download_with_rtmpdump(self, filename, url): | ||||
| 		self.report_destination(filename) | ||||
|  | ||||
| 		# Check for rtmpdump first | ||||
| 		try: | ||||
| 			subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | ||||
| 		except (OSError, IOError): | ||||
| 			self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') | ||||
| 			return False | ||||
|  | ||||
| 		# Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
| 		# the connection was interrumpted and resuming appears to be | ||||
| 		# possible. This is part of rtmpdump's normal usage, AFAIK. | ||||
| 		retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)]) | ||||
| 		while retval == 2: | ||||
| 			self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True) | ||||
| 			time.sleep(2.0) # This seems to be needed | ||||
| 			retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename]) | ||||
| 		if retval == 0: | ||||
| 			self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename)) | ||||
| 			return True | ||||
| 		else: | ||||
| 			self.trouble('ERROR: rtmpdump exited with code %d' % retval) | ||||
| 			return False | ||||
|  | ||||
| 	def _do_download(self, filename, url): | ||||
| 		# Attempt to download using rtmpdump | ||||
| 		if url.startswith('rtmp'): | ||||
| 			return self._download_with_rtmpdump(filename, url) | ||||
|  | ||||
| 		stream = None | ||||
| 		open_mode = 'wb' | ||||
| 		basic_request = urllib2.Request(url, None, std_headers) | ||||
| 		request = urllib2.Request(url, None, std_headers) | ||||
|  | ||||
| 		# Establish possible resume length | ||||
| 		if os.path.isfile(filename): | ||||
| 			resume_len = os.path.getsize(filename) | ||||
| 		else: | ||||
| 			resume_len = 0 | ||||
|  | ||||
| 		# Request parameters in case of being able to resume | ||||
| 		if self.params.get('continuedl', False) and resume_len != 0: | ||||
| 			self.report_resuming_byte(resume_len) | ||||
| 			request.add_header('Range','bytes=%d-' % resume_len) | ||||
| 			open_mode = 'ab' | ||||
|  | ||||
| 		# Establish connection | ||||
| 		try: | ||||
| 			data = urllib2.urlopen(request) | ||||
| 		except (urllib2.HTTPError, ), err: | ||||
| 			if err.code != 416: #  416 is 'Requested range not satisfiable' | ||||
| 				raise | ||||
| 			# Unable to resume | ||||
| 			data = urllib2.urlopen(basic_request) | ||||
| 			content_length = data.info()['Content-Length'] | ||||
|  | ||||
| 			if content_length is not None and long(content_length) == resume_len: | ||||
| 				# Because the file had already been fully downloaded | ||||
| 				self.report_file_already_downloaded(filename) | ||||
| 				return True | ||||
| 			else: | ||||
| 				# Because the server didn't let us | ||||
| 				self.report_unable_to_resume() | ||||
| 				open_mode = 'wb' | ||||
|  | ||||
| 		data_len = data.info().get('Content-length', None) | ||||
| 		data_len_str = self.format_bytes(data_len) | ||||
| 		byte_counter = 0 | ||||
| 		block_size = 1024 | ||||
| 		start = time.time() | ||||
| 		while True: | ||||
| 			# Progress message | ||||
| 			percent_str = self.calc_percent(byte_counter, data_len) | ||||
| 			eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) | ||||
| 			speed_str = self.calc_speed(start, time.time(), byte_counter) | ||||
| 			self.report_progress(percent_str, data_len_str, speed_str, eta_str) | ||||
|  | ||||
| 			# Download and write | ||||
| 			before = time.time() | ||||
| 			data_block = data.read(block_size) | ||||
| @@ -389,15 +472,31 @@ class FileDownloader(object): | ||||
| 			if data_block_len == 0: | ||||
| 				break | ||||
| 			byte_counter += data_block_len | ||||
|  | ||||
| 			# Open file just in time | ||||
| 			if stream is None: | ||||
| 				try: | ||||
| 					stream = open(filename, open_mode) | ||||
| 					self.report_destination(filename) | ||||
| 				except (OSError, IOError), err: | ||||
| 					self.trouble('ERROR: unable to open for writing: %s' % str(err)) | ||||
| 					return False | ||||
| 			stream.write(data_block) | ||||
| 			block_size = self.best_block_size(after - before, data_block_len) | ||||
|  | ||||
| 			# Progress message | ||||
| 			percent_str = self.calc_percent(byte_counter, data_len) | ||||
| 			eta_str = self.calc_eta(start, time.time(), data_len, byte_counter) | ||||
| 			speed_str = self.calc_speed(start, time.time(), byte_counter) | ||||
| 			self.report_progress(percent_str, data_len_str, speed_str, eta_str) | ||||
|  | ||||
| 			# Apply rate limit | ||||
| 			self.slow_down(start, byte_counter) | ||||
|  | ||||
| 		self.report_finish() | ||||
| 		if data_len is not None and str(byte_counter) != data_len: | ||||
| 			raise ContentTooShortError(byte_counter, long(data_len)) | ||||
| 		return True | ||||
|  | ||||
| class InfoExtractor(object): | ||||
| 	"""Information Extractor class. | ||||
| @@ -468,12 +567,13 @@ class YoutubeIE(InfoExtractor): | ||||
| 	_LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' | ||||
| 	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||
| 	_NETRC_MACHINE = 'youtube' | ||||
| 	_available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag | ||||
| 	_available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag | ||||
| 	_video_extensions = { | ||||
| 		'13': '3gp', | ||||
| 		'17': 'mp4', | ||||
| 		'18': 'mp4', | ||||
| 		'22': 'mp4', | ||||
| 		'37': 'mp4', | ||||
| 	} | ||||
|  | ||||
| 	@staticmethod | ||||
| @@ -515,22 +615,22 @@ class YoutubeIE(InfoExtractor): | ||||
| 		"""Report attempt to confirm age.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] Confirming age') | ||||
| 	 | ||||
| 	def report_webpage_download(self, video_id): | ||||
| 		"""Report attempt to download webpage.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) | ||||
| 	def report_video_info_webpage_download(self, video_id): | ||||
| 		"""Report attempt to download video info webpage.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id) | ||||
| 	 | ||||
| 	def report_information_extraction(self, video_id): | ||||
| 		"""Report attempt to extract video information.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) | ||||
| 	 | ||||
| 	def report_video_url(self, video_id, video_real_url): | ||||
| 		"""Report extracted video URL.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) | ||||
| 	 | ||||
| 	def report_unavailable_format(self, video_id, format): | ||||
| 		"""Report extracted video URL.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) | ||||
| 	 | ||||
| 	def report_rtmp_download(self): | ||||
| 		"""Indicate the download will use the RTMP protocol.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] RTMP download detected') | ||||
| 	 | ||||
| 	def _real_initialize(self): | ||||
| 		if self._downloader is None: | ||||
| 			return | ||||
| @@ -623,42 +723,52 @@ class YoutubeIE(InfoExtractor): | ||||
| 			# Extension | ||||
| 			video_extension = self._video_extensions.get(format_param, 'flv') | ||||
|  | ||||
| 			# Normalize URL, including format | ||||
| 			normalized_url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id | ||||
| 			if format_param is not None: | ||||
| 				normalized_url = '%s&fmt=%s' % (normalized_url, format_param) | ||||
| 			request = urllib2.Request(normalized_url, None, std_headers) | ||||
| 			# Get video info | ||||
| 			video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id | ||||
| 			request = urllib2.Request(video_info_url, None, std_headers) | ||||
| 			try: | ||||
| 				self.report_webpage_download(video_id) | ||||
| 				video_webpage = urllib2.urlopen(request).read() | ||||
| 				self.report_video_info_webpage_download(video_id) | ||||
| 				video_info_webpage = urllib2.urlopen(request).read() | ||||
| 				video_info = urlparse.parse_qs(video_info_webpage) | ||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| 				self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) | ||||
| 				self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) | ||||
| 				return | ||||
| 			self.report_information_extraction(video_id) | ||||
|  | ||||
| 			# "t" param | ||||
| 			mobj = re.search(r', "t": "([^"]+)"', video_webpage) | ||||
| 			if mobj is None: | ||||
| 				self._downloader.trouble(u'ERROR: unable to extract "t" parameter') | ||||
| 			if 'token' not in video_info: | ||||
| 				# Attempt to see if YouTube has issued an error message | ||||
| 				if 'reason' not in video_info: | ||||
| 					self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason') | ||||
| 					stream = open('reportme-ydl-%s.dat' % time.time(), 'wb') | ||||
| 					stream.write(video_info_webpage) | ||||
| 					stream.close() | ||||
| 				else: | ||||
| 					reason = urllib.unquote_plus(video_info['reason'][0]) | ||||
| 					self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8')) | ||||
| 				return | ||||
| 			video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&el=detailpage&ps=' % (video_id, mobj.group(1)) | ||||
| 			token = urllib.unquote_plus(video_info['token'][0]) | ||||
| 			video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) | ||||
| 			if format_param is not None: | ||||
| 				video_real_url = '%s&fmt=%s' % (video_real_url, format_param) | ||||
| 			self.report_video_url(video_id, video_real_url) | ||||
|  | ||||
| 			# Check possible RTMP download | ||||
| 			if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | ||||
| 				self.report_rtmp_download() | ||||
| 				video_real_url = video_info['conn'][0] | ||||
|  | ||||
| 			# uploader | ||||
| 			mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage) | ||||
| 			if mobj is None: | ||||
| 			if 'author' not in video_info: | ||||
| 				self._downloader.trouble(u'ERROR: unable to extract uploader nickname') | ||||
| 				return | ||||
| 			video_uploader = mobj.group(1) | ||||
| 			video_uploader = urllib.unquote_plus(video_info['author'][0]) | ||||
|  | ||||
| 			# title | ||||
| 			mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage) | ||||
| 			if mobj is None: | ||||
| 			if 'title' not in video_info: | ||||
| 				self._downloader.trouble(u'ERROR: unable to extract video title') | ||||
| 				return | ||||
| 			video_title = mobj.group(1).decode('utf-8') | ||||
| 			video_title = urllib.unquote_plus(video_info['title'][0]) | ||||
| 			video_title = video_title.decode('utf-8') | ||||
| 			video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) | ||||
| 			video_title = video_title.replace(os.sep, u'%') | ||||
|  | ||||
| @@ -779,19 +889,21 @@ class MetacafeIE(InfoExtractor): | ||||
|  | ||||
| 		# Extract URL, uploader and title from webpage | ||||
| 		self.report_extraction(video_id) | ||||
| 		mobj = re.search(r'(?m)&mediaURL=(http.*?\.flv)', webpage) | ||||
| 		mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract media URL') | ||||
| 			return | ||||
| 		mediaURL = urllib.unquote(mobj.group(1)) | ||||
|  | ||||
| 		mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract gdaKey') | ||||
| 			return | ||||
| 		gdaKey = mobj.group(1) | ||||
| 		#mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) | ||||
| 		#if mobj is None: | ||||
| 		#	self._downloader.trouble(u'ERROR: unable to extract gdaKey') | ||||
| 		#	return | ||||
| 		#gdaKey = mobj.group(1) | ||||
| 		# | ||||
| 		#video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) | ||||
|  | ||||
| 		video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) | ||||
| 		video_url = mediaURL | ||||
|  | ||||
| 		mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage) | ||||
| 		if mobj is None: | ||||
| @@ -799,7 +911,7 @@ class MetacafeIE(InfoExtractor): | ||||
| 			return | ||||
| 		video_title = mobj.group(1).decode('utf-8') | ||||
|  | ||||
| 		mobj = re.search(r'(?ms)<li id="ChnlUsr">.*?Submitter:.*?<a .*?>(.*?)<', webpage) | ||||
| 		mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract uploader nickname') | ||||
| 			return | ||||
| @@ -824,7 +936,7 @@ class YoutubeSearchIE(InfoExtractor): | ||||
| 	_VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' | ||||
| 	_TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' | ||||
| 	_VIDEO_INDICATOR = r'href="/watch\?v=.+?"' | ||||
| 	_MORE_PAGES_INDICATOR = r'>Next</a>' | ||||
| 	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' | ||||
| 	_youtube_ie = None | ||||
| 	_max_youtube_results = 1000 | ||||
|  | ||||
| @@ -859,7 +971,7 @@ class YoutubeSearchIE(InfoExtractor): | ||||
| 			return | ||||
| 		else: | ||||
| 			try: | ||||
| 				n = int(prefix) | ||||
| 				n = long(prefix) | ||||
| 				if n <= 0: | ||||
| 					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) | ||||
| 					return | ||||
| @@ -868,7 +980,7 @@ class YoutubeSearchIE(InfoExtractor): | ||||
| 					n = self._max_youtube_results | ||||
| 				self._download_n_results(query, n) | ||||
| 				return | ||||
| 			except ValueError: # parsing prefix as int fails | ||||
| 			except ValueError: # parsing prefix as integer fails | ||||
| 				self._download_n_results(query, 1) | ||||
| 				return | ||||
|  | ||||
| @@ -901,7 +1013,7 @@ class YoutubeSearchIE(InfoExtractor): | ||||
| 							self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) | ||||
| 						return | ||||
|  | ||||
| 			if self._MORE_PAGES_INDICATOR not in page: | ||||
| 			if re.search(self._MORE_PAGES_INDICATOR, page) is None: | ||||
| 				for id in video_ids: | ||||
| 					self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) | ||||
| 				return | ||||
| @@ -911,10 +1023,10 @@ class YoutubeSearchIE(InfoExtractor): | ||||
| class YoutubePlaylistIE(InfoExtractor): | ||||
| 	"""Information Extractor for YouTube playlists.""" | ||||
|  | ||||
| 	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' | ||||
| 	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' | ||||
| 	_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' | ||||
| 	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&' | ||||
| 	_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' | ||||
| 	_MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' | ||||
| 	_youtube_ie = None | ||||
|  | ||||
| 	def __init__(self, youtube_ie, downloader=None): | ||||
| @@ -960,7 +1072,7 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
| 					ids_in_page.append(mobj.group(1)) | ||||
| 			video_ids.extend(ids_in_page) | ||||
|  | ||||
| 			if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page: | ||||
| 			if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page: | ||||
| 				break | ||||
| 			pagenum = pagenum + 1 | ||||
|  | ||||
| @@ -968,6 +1080,61 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
| 			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) | ||||
| 		return | ||||
|  | ||||
| class YoutubeUserIE(InfoExtractor): | ||||
| 	"""Information Extractor for YouTube users.""" | ||||
|  | ||||
| 	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' | ||||
| 	_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' | ||||
| 	_VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. | ||||
| 	_youtube_ie = None | ||||
|  | ||||
| 	def __init__(self, youtube_ie, downloader=None): | ||||
| 		InfoExtractor.__init__(self, downloader) | ||||
| 		self._youtube_ie = youtube_ie | ||||
| 	 | ||||
| 	@staticmethod | ||||
| 	def suitable(url): | ||||
| 		return (re.match(YoutubeUserIE._VALID_URL, url) is not None) | ||||
|  | ||||
| 	def report_download_page(self, username): | ||||
| 		"""Report attempt to download user page.""" | ||||
| 		self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) | ||||
|  | ||||
| 	def _real_initialize(self): | ||||
| 		self._youtube_ie.initialize() | ||||
| 	 | ||||
| 	def _real_extract(self, url): | ||||
| 		# Extract username | ||||
| 		mobj = re.match(self._VALID_URL, url) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: invalid url: %s' % url) | ||||
| 			return | ||||
|  | ||||
| 		# Download user page | ||||
| 		username = mobj.group(1) | ||||
| 		video_ids = [] | ||||
| 		pagenum = 1 | ||||
|  | ||||
| 		self.report_download_page(username) | ||||
| 		request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) | ||||
| 		try: | ||||
| 			page = urllib2.urlopen(request).read() | ||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| 			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) | ||||
| 			return | ||||
|  | ||||
| 		# Extract video identifiers | ||||
| 		ids_in_page = [] | ||||
|  | ||||
| 		for mobj in re.finditer(self._VIDEO_INDICATOR, page): | ||||
| 			if mobj.group(1) not in ids_in_page: | ||||
| 				ids_in_page.append(mobj.group(1)) | ||||
| 		video_ids.extend(ids_in_page) | ||||
|  | ||||
| 		for id in video_ids: | ||||
| 			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) | ||||
| 		return | ||||
|  | ||||
| class PostProcessor(object): | ||||
| 	"""Post Processor class. | ||||
|  | ||||
| @@ -1021,6 +1188,22 @@ if __name__ == '__main__': | ||||
| 		import getpass | ||||
| 		import optparse | ||||
|  | ||||
| 		# Function to update the program file with the latest version from bitbucket.org | ||||
| 		def update_self(downloader, filename): | ||||
| 			# Note: downloader only used for options | ||||
| 			if not os.access (filename, os.W_OK): | ||||
| 				sys.exit('ERROR: no write permissions on %s' % filename) | ||||
|  | ||||
| 			downloader.to_stdout('Updating to latest stable version...') | ||||
| 			latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' | ||||
| 			latest_version = urllib.urlopen(latest_url).read().strip() | ||||
| 			prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version | ||||
| 			newcontent = urllib.urlopen(prog_url).read() | ||||
| 			stream = open(filename, 'w') | ||||
| 			stream.write(newcontent) | ||||
| 			stream.close() | ||||
| 			downloader.to_stdout('Updated to version %s' % latest_version) | ||||
|  | ||||
| 		# General configuration | ||||
| 		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) | ||||
| 		urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) | ||||
| @@ -1029,7 +1212,7 @@ if __name__ == '__main__': | ||||
| 		# Parse command line | ||||
| 		parser = optparse.OptionParser( | ||||
| 			usage='Usage: %prog [options] url...', | ||||
| 			version='2009.05.25', | ||||
| 			version='2010.01.05', | ||||
| 			conflict_handler='resolve', | ||||
| 		) | ||||
|  | ||||
| @@ -1037,6 +1220,8 @@ if __name__ == '__main__': | ||||
| 				action='help', help='print this help text and exit') | ||||
| 		parser.add_option('-v', '--version', | ||||
| 				action='version', help='print program version and exit') | ||||
| 		parser.add_option('-U', '--update', | ||||
| 				action='store_true', dest='update_self', help='update this program to latest stable version') | ||||
| 		parser.add_option('-i', '--ignore-errors', | ||||
| 				action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) | ||||
| 		parser.add_option('-r', '--rate-limit', | ||||
| @@ -1053,7 +1238,7 @@ if __name__ == '__main__': | ||||
|  | ||||
| 		video_format = optparse.OptionGroup(parser, 'Video Format Options') | ||||
| 		video_format.add_option('-f', '--format', | ||||
| 				action='append', dest='format', metavar='FMT', help='video format code') | ||||
| 				action='store', dest='format', metavar='FMT', help='video format code') | ||||
| 		video_format.add_option('-b', '--best-quality', | ||||
| 				action='store_const', dest='format', help='download the best quality video possible', const='0') | ||||
| 		video_format.add_option('-m', '--mobile-version', | ||||
| @@ -1084,6 +1269,8 @@ if __name__ == '__main__': | ||||
| 				dest='batchfile', metavar='F', help='file containing URLs to download') | ||||
| 		filesystem.add_option('-w', '--no-overwrites', | ||||
| 				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) | ||||
| 		filesystem.add_option('-c', '--continue', | ||||
| 				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) | ||||
| 		parser.add_option_group(filesystem) | ||||
|  | ||||
| 		(opts, args) = parser.parse_args() | ||||
| @@ -1100,8 +1287,6 @@ if __name__ == '__main__': | ||||
| 		all_urls = batchurls + args | ||||
|  | ||||
| 		# Conflicting, missing and erroneous options | ||||
| 		if len(all_urls) < 1: | ||||
| 			parser.error(u'you must provide at least one URL') | ||||
| 		if opts.usenetrc and (opts.username is not None or opts.password is not None): | ||||
| 			parser.error(u'using .netrc conflicts with giving username/password') | ||||
| 		if opts.password is not None and opts.username is None: | ||||
| @@ -1122,6 +1307,7 @@ if __name__ == '__main__': | ||||
| 		youtube_ie = YoutubeIE() | ||||
| 		metacafe_ie = MetacafeIE(youtube_ie) | ||||
| 		youtube_pl_ie = YoutubePlaylistIE(youtube_ie) | ||||
| 		youtube_user_ie = YoutubeUserIE(youtube_ie) | ||||
| 		youtube_search_ie = YoutubeSearchIE(youtube_ie) | ||||
|  | ||||
| 		# File downloader | ||||
| @@ -1134,18 +1320,31 @@ if __name__ == '__main__': | ||||
| 			'forcetitle': opts.gettitle, | ||||
| 			'simulate': (opts.simulate or opts.geturl or opts.gettitle), | ||||
| 			'format': opts.format, | ||||
| 			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) | ||||
| 			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) | ||||
| 				or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') | ||||
| 				or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') | ||||
| 				or u'%(id)s.%(ext)s'), | ||||
| 			'ignoreerrors': opts.ignoreerrors, | ||||
| 			'ratelimit': opts.ratelimit, | ||||
| 			'nooverwrites': opts.nooverwrites, | ||||
| 			'continuedl': opts.continue_dl, | ||||
| 			}) | ||||
| 		fd.add_info_extractor(youtube_search_ie) | ||||
| 		fd.add_info_extractor(youtube_pl_ie) | ||||
| 		fd.add_info_extractor(youtube_user_ie) | ||||
| 		fd.add_info_extractor(metacafe_ie) | ||||
| 		fd.add_info_extractor(youtube_ie) | ||||
|  | ||||
| 		# Update version | ||||
| 		if opts.update_self: | ||||
| 			update_self(fd, sys.argv[0]) | ||||
|  | ||||
| 		# Maybe do nothing | ||||
| 		if len(all_urls) < 1: | ||||
| 			if not opts.update_self: | ||||
| 				parser.error(u'you must provide at least one URL') | ||||
| 			else: | ||||
| 				sys.exit() | ||||
| 		retcode = fd.download(all_urls) | ||||
| 		sys.exit(retcode) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user