mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-09-30 13:28:36 +09:00
Compare commits
58 Commits
2013.12.17
...
2013.12.23
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8958b6916c | ||
![]() |
9fc3bef87a | ||
![]() |
d80044c235 | ||
![]() |
bc2103f3bf | ||
![]() |
f82b18efc1 | ||
![]() |
504c668d3b | ||
![]() |
466617f539 | ||
![]() |
196938835a | ||
![]() |
a94e129a65 | ||
![]() |
5d681e960d | ||
![]() |
c7b487d96b | ||
![]() |
7dbf5ae587 | ||
![]() |
8d0bdeba18 | ||
![]() |
1b969041d7 | ||
![]() |
e302f9ce32 | ||
![]() |
5a94982abe | ||
![]() |
7115ca84aa | ||
![]() |
04ff34ab89 | ||
![]() |
bbafbe20c2 | ||
![]() |
c4d55a33fc | ||
![]() |
147e4aece0 | ||
![]() |
bd1488ae64 | ||
![]() |
79fed2a4df | ||
![]() |
304cbe981e | ||
![]() |
3fefbf50e3 | ||
![]() |
f65c1d2be0 | ||
![]() |
aa94a6d315 | ||
![]() |
768df74538 | ||
![]() |
1f9da9049b | ||
![]() |
c0d0b01f0e | ||
![]() |
7c86a5b864 | ||
![]() |
97e302a419 | ||
![]() |
71507a11c8 | ||
![]() |
a51e37af62 | ||
![]() |
1fb8f09273 | ||
![]() |
6c6db72ed4 | ||
![]() |
0cc83dc54b | ||
![]() |
5ce54a8205 | ||
![]() |
8c21b7c647 | ||
![]() |
77aa6b329d | ||
![]() |
62d68c43ed | ||
![]() |
bfaae0a768 | ||
![]() |
e56f22ae20 | ||
![]() |
dbd1988ed9 | ||
![]() |
4ea3be0a5c | ||
![]() |
3e78514568 | ||
![]() |
e029b8bd43 | ||
![]() |
f5567e401c | ||
![]() |
9b8aaeed85 | ||
![]() |
6086d121cb | ||
![]() |
7de6e075b4 | ||
![]() |
946135aa2a | ||
![]() |
42393ce234 | ||
![]() |
d6c7a367e8 | ||
![]() |
cecaaf3f58 | ||
![]() |
3bc2ddccc8 | ||
![]() |
8ab470f1b2 | ||
![]() |
c8434e8316 |
@@ -39,7 +39,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
/youtube-dl .
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--bidi-workaround Work around terminals that lack bidirectional
|
||||
text support. Requires fribidi executable in PATH
|
||||
text support. Requires bidiv or fribidi
|
||||
executable in PATH
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
|
2
setup.py
2
setup.py
@@ -71,7 +71,7 @@ setup(
|
||||
author_email='ytdl@yt-dl.org',
|
||||
maintainer='Philipp Hagemeister',
|
||||
maintainer_email='phihag@phihag.de',
|
||||
packages=['youtube_dl', 'youtube_dl.extractor'],
|
||||
packages=['youtube_dl', 'youtube_dl.extractor', 'youtube_dl.downloader'],
|
||||
|
||||
# Provokes warning on most systems (why?!)
|
||||
# test_suite = 'nose.collector',
|
||||
|
@@ -90,7 +90,7 @@ def generator(test_case):
|
||||
def _hook(status):
|
||||
if status['status'] == 'finished':
|
||||
finished_hook_called.add(status['filename'])
|
||||
ydl.fd.add_progress_hook(_hook)
|
||||
ydl.add_downloader_progress_hook(_hook)
|
||||
|
||||
def get_tc_filename(tc):
|
||||
return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
|
||||
|
@@ -27,7 +27,8 @@ from youtube_dl.extractor import (
|
||||
BambuserChannelIE,
|
||||
BandcampAlbumIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE
|
||||
SmotriUserIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
|
||||
|
||||
@@ -168,6 +169,24 @@ class TestPlaylists(unittest.TestCase):
|
||||
self.assertEqual(result['title'], u'Building Dynamic Websites')
|
||||
self.assertEqual(result['description'], u"Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
|
||||
self.assertEqual(len(result['entries']), 10)
|
||||
|
||||
def test_ivi_compilation(self):
|
||||
dl = FakeYDL()
|
||||
ie = IviCompilationIE(dl)
|
||||
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'dezhurnyi_angel')
|
||||
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012)')
|
||||
self.assertTrue(len(result['entries']) >= 36)
|
||||
|
||||
def test_ivi_compilation_season(self):
|
||||
dl = FakeYDL()
|
||||
ie = IviCompilationIE(dl)
|
||||
result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season2')
|
||||
self.assertIsPlaylist(result)
|
||||
self.assertEqual(result['id'], u'dezhurnyi_angel/season2')
|
||||
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
|
||||
self.assertTrue(len(result['entries']) >= 20)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -188,6 +188,9 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz')
|
||||
self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz')
|
||||
self.assertEqual(
|
||||
url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||
u'trailer.mp4')
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -1,724 +1,12 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .utils import (
|
||||
compat_urllib_error,
|
||||
compat_urllib_request,
|
||||
ContentTooShortError,
|
||||
determine_ext,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
sanitize_open,
|
||||
timeconvert,
|
||||
)
|
||||
|
||||
|
||||
class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
File downloader objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk.
|
||||
|
||||
File downloaders accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead.
|
||||
|
||||
Available options:
|
||||
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
"""
|
||||
|
||||
params = None
|
||||
|
||||
def __init__(self, ydl, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
return '--:--:--'
|
||||
if hours == 0:
|
||||
return '%02d:%02d' % (mins, secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (hours, mins, secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_percent(byte_counter, data_len):
|
||||
if data_len is None:
|
||||
return None
|
||||
return float(byte_counter) / float(data_len) * 100.0
|
||||
|
||||
@staticmethod
|
||||
def format_percent(percent):
|
||||
if percent is None:
|
||||
return '---.-%'
|
||||
return '%6s' % ('%3.1f%%' % percent)
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
if total is None:
|
||||
return None
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def format_eta(eta):
|
||||
if eta is None:
|
||||
return '--:--'
|
||||
return FileDownloader.format_seconds(eta)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
dif = now - start
|
||||
if bytes == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
return float(bytes) / dif
|
||||
|
||||
@staticmethod
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
|
||||
if elapsed_time < 0.001:
|
||||
return int(new_max)
|
||||
rate = bytes / elapsed_time
|
||||
if rate > new_max:
|
||||
return int(new_max)
|
||||
if rate < new_min:
|
||||
return int(new_min)
|
||||
return int(rate)
|
||||
|
||||
@staticmethod
|
||||
def parse_bytes(bytestr):
|
||||
"""Parse a string indicating a byte quantity into an integer."""
|
||||
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
|
||||
if matchobj is None:
|
||||
return None
|
||||
number = float(matchobj.group(1))
|
||||
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_screen(*args, **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_console_title(self, message):
|
||||
self.ydl.to_console_title(message)
|
||||
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
|
||||
def report_warning(self, *args, **kargs):
|
||||
self.ydl.report_warning(*args, **kargs)
|
||||
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == u'-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
return filename
|
||||
return filename + u'.part'
|
||||
|
||||
def undo_temp_name(self, filename):
|
||||
if filename.endswith(u'.part'):
|
||||
return filename[:-len(u'.part')]
|
||||
return filename
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError):
|
||||
self.report_error(u'unable to rename file')
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
return
|
||||
filetime = timeconvert(timestr)
|
||||
if filetime is None:
|
||||
return filetime
|
||||
# Ignore obviously invalid dates
|
||||
if filetime == 0:
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
def report_destination(self, filename):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = u'[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = u'\r'
|
||||
else:
|
||||
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title(u'youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
|
||||
msg = (u'%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
downloaded_str = format_bytes(downloaded_data_len)
|
||||
speed_str = self.format_speed(speed)
|
||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
self._report_progress_status(
|
||||
(u'100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen(u'[download] Unable to resume')
|
||||
|
||||
def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'speed': speed,
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
return proc.returncode
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
# Check for rtmpdump first
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_with_mplayer(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
||||
|
||||
def _download_m3u8_with_ffmpeg(self, filename, url):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
break
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
||||
# Legacy file for backwards compatibility, use youtube_dl.downloader instead!
|
||||
from .downloader import FileDownloader as RealFileDownloader
|
||||
from .downloader import get_suitable_downloader
|
||||
|
||||
|
||||
# This class reproduces the old behaviour of FileDownloader
|
||||
class FileDownloader(RealFileDownloader):
|
||||
def _do_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
|
||||
# Check file already present
|
||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
return True
|
||||
|
||||
# Attempt to download using rtmpdump
|
||||
if url.startswith('rtmp'):
|
||||
return self._download_with_rtmpdump(filename, url,
|
||||
info_dict.get('player_url', None),
|
||||
info_dict.get('page_url', None),
|
||||
info_dict.get('play_path', None),
|
||||
info_dict.get('tc_url', None),
|
||||
info_dict.get('rtmp_live', False),
|
||||
info_dict.get('rtmp_conn', None))
|
||||
|
||||
# Attempt to download using mplayer
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return self._download_with_mplayer(filename, url)
|
||||
|
||||
# m3u8 manifest are downloaded with ffmpeg
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return self._download_m3u8_with_ffmpeg(filename, url)
|
||||
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
if self.params.get('test', False):
|
||||
request.add_header('Range','bytes=0-10240')
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', False):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range','bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
if count == 0 and 'urlhandle' in info_dict:
|
||||
data = info_dict['urlhandle']
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
elif err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None and
|
||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
# changing the file size slightly and causing problems for some users. So
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(filename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
open_mode = 'wb'
|
||||
break
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
while True:
|
||||
# Download and write
|
||||
before = time.time()
|
||||
data_block = data.read(block_size)
|
||||
after = time.time()
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# Open file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
assert stream is not None
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError) as err:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
def _hook_progress(self, status):
|
||||
real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
""" ph gets called on download progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
It can also have some of the following entries:
|
||||
|
||||
* downloaded_bytes: Bytes on disks
|
||||
* total_bytes: Total bytes, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if unknown
|
||||
|
||||
Hooks are guaranteed to be called at least once (with status "finished")
|
||||
if the download is successful.
|
||||
"""
|
||||
self._progress_hooks.append(ph)
|
||||
real_fd.add_progress_hook(ph)
|
||||
return real_fd.download(filename, info_dict)
|
||||
|
@@ -53,7 +53,7 @@ from .utils import (
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
from .extractor import get_info_extractor, gen_extractors
|
||||
from .FileDownloader import FileDownloader
|
||||
from .downloader import get_suitable_downloader
|
||||
from .version import __version__
|
||||
|
||||
|
||||
@@ -167,7 +167,7 @@ class YoutubeDL(object):
|
||||
self._ies = []
|
||||
self._ies_instances = {}
|
||||
self._pps = []
|
||||
self._progress_hooks = []
|
||||
self._fd_progress_hooks = []
|
||||
self._download_retcode = 0
|
||||
self._num_downloads = 0
|
||||
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
|
||||
@@ -183,12 +183,18 @@ class YoutubeDL(object):
|
||||
width_args = []
|
||||
else:
|
||||
width_args = ['-w', str(width)]
|
||||
self._fribidi = subprocess.Popen(
|
||||
['fribidi', '-c', 'UTF-8'] + width_args,
|
||||
sp_kwargs = dict(
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=slave,
|
||||
stderr=self._err_file)
|
||||
self._fribidi_channel = os.fdopen(master, 'rb')
|
||||
try:
|
||||
self._output_process = subprocess.Popen(
|
||||
['bidiv'] + width_args, **sp_kwargs
|
||||
)
|
||||
except OSError:
|
||||
self._output_process = subprocess.Popen(
|
||||
['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
|
||||
self._output_channel = os.fdopen(master, 'rb')
|
||||
except OSError as ose:
|
||||
if ose.errno == 2:
|
||||
self.report_warning(u'Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
|
||||
@@ -205,8 +211,6 @@ class YoutubeDL(object):
|
||||
u'Set the LC_ALL environment variable to fix this.')
|
||||
self.params['restrictfilenames'] = True
|
||||
|
||||
self.fd = FileDownloader(self, self.params)
|
||||
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
@@ -242,15 +246,20 @@ class YoutubeDL(object):
|
||||
self._pps.append(pp)
|
||||
pp.set_downloader(self)
|
||||
|
||||
def add_downloader_progress_hook(self, ph):
|
||||
"""Add the progress hook to the file downloader"""
|
||||
self._fd_progress_hooks.append(ph)
|
||||
|
||||
def _bidi_workaround(self, message):
|
||||
if not hasattr(self, '_fribidi_channel'):
|
||||
if not hasattr(self, '_output_channel'):
|
||||
return message
|
||||
|
||||
assert hasattr(self, '_output_process')
|
||||
assert type(message) == type(u'')
|
||||
line_count = message.count(u'\n') + 1
|
||||
self._fribidi.stdin.write((message + u'\n').encode('utf-8'))
|
||||
self._fribidi.stdin.flush()
|
||||
res = u''.join(self._fribidi_channel.readline().decode('utf-8')
|
||||
self._output_process.stdin.write((message + u'\n').encode('utf-8'))
|
||||
self._output_process.stdin.flush()
|
||||
res = u''.join(self._output_channel.readline().decode('utf-8')
|
||||
for _ in range(line_count))
|
||||
return res[:-len(u'\n')]
|
||||
|
||||
@@ -636,7 +645,7 @@ class YoutubeDL(object):
|
||||
info_dict['playlist_index'] = None
|
||||
|
||||
# This extractors handle format selection themselves
|
||||
if info_dict['extractor'] in [u'youtube', u'Youku']:
|
||||
if info_dict['extractor'] in [u'Youku']:
|
||||
if download:
|
||||
self.process_info(info_dict)
|
||||
return info_dict
|
||||
@@ -662,10 +671,6 @@ class YoutubeDL(object):
|
||||
if 'ext' not in format:
|
||||
format['ext'] = determine_ext(format['url'])
|
||||
|
||||
if self.params.get('listformats', None):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
format_limit = self.params.get('format_limit', None)
|
||||
if format_limit:
|
||||
formats = list(takewhile_inclusive(
|
||||
@@ -678,9 +683,16 @@ class YoutubeDL(object):
|
||||
except ValueError:
|
||||
ext_ord = -1
|
||||
# We only compare the extension if they have the same height and width
|
||||
return (f.get('height'), f.get('width'), ext_ord)
|
||||
return (f.get('height') if f.get('height') is not None else -1,
|
||||
f.get('width') if f.get('width') is not None else -1,
|
||||
ext_ord)
|
||||
formats = sorted(formats, key=_free_formats_key)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
if self.params.get('listformats', None):
|
||||
self.list_formats(info_dict)
|
||||
return
|
||||
|
||||
req_format = self.params.get('format', 'best')
|
||||
if req_format is None:
|
||||
req_format = 'best'
|
||||
@@ -870,7 +882,10 @@ class YoutubeDL(object):
|
||||
success = True
|
||||
else:
|
||||
try:
|
||||
success = self.fd._do_download(filename, info_dict)
|
||||
fd = get_suitable_downloader(info_dict)(self, self.params)
|
||||
for ph in self._fd_progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
success = fd.download(filename, info_dict)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
self.report_error(u'unable to download video data: %s' % str(err))
|
||||
return
|
||||
|
@@ -56,7 +56,6 @@ from .utils import (
|
||||
compat_print,
|
||||
DateRange,
|
||||
decodeOption,
|
||||
determine_ext,
|
||||
get_term_width,
|
||||
DownloadError,
|
||||
get_cachedir,
|
||||
@@ -195,7 +194,7 @@ def parseOpts(overrideArguments=None):
|
||||
type=float, default=None, help=optparse.SUPPRESS_HELP)
|
||||
general.add_option(
|
||||
'--bidi-workaround', dest='bidi_workaround', action='store_true',
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires fribidi executable in PATH')
|
||||
help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||
|
||||
|
||||
selection.add_option(
|
||||
@@ -525,7 +524,6 @@ def _real_main(argv=None):
|
||||
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
|
||||
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
|
||||
matchedUrls = [url for url in all_urls if ie.suitable(url)]
|
||||
all_urls = [url for url in all_urls if url not in matchedUrls]
|
||||
for mu in matchedUrls:
|
||||
compat_print(u' ' + mu)
|
||||
sys.exit(0)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
|
||||
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text']
|
||||
|
||||
import base64
|
||||
from math import ceil
|
||||
@@ -32,6 +32,31 @@ def aes_ctr_decrypt(data, key, counter):
|
||||
|
||||
return decrypted_data
|
||||
|
||||
def aes_cbc_decrypt(data, key, iv):
|
||||
"""
|
||||
Decrypt with aes in CBC mode
|
||||
|
||||
@param {int[]} data cipher
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@param {int[]} iv 16-Byte IV
|
||||
@returns {int[]} decrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
decrypted_data=[]
|
||||
previous_cipher_block = iv
|
||||
for i in range(block_count):
|
||||
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
|
||||
block += [0]*(BLOCK_SIZE_BYTES - len(block))
|
||||
|
||||
decrypted_block = aes_decrypt(block, expanded_key)
|
||||
decrypted_data += xor(decrypted_block, previous_cipher_block)
|
||||
previous_cipher_block = block
|
||||
decrypted_data = decrypted_data[:len(data)]
|
||||
|
||||
return decrypted_data
|
||||
|
||||
def key_expansion(data):
|
||||
"""
|
||||
Generate key schedule
|
||||
@@ -75,7 +100,7 @@ def aes_encrypt(data, expanded_key):
|
||||
@returns {int[]} 16-Byte cipher
|
||||
"""
|
||||
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
|
||||
|
||||
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
for i in range(1, rounds+1):
|
||||
data = sub_bytes(data)
|
||||
@@ -83,6 +108,26 @@ def aes_encrypt(data, expanded_key):
|
||||
if i != rounds:
|
||||
data = mix_columns(data)
|
||||
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
|
||||
def aes_decrypt(data, expanded_key):
|
||||
"""
|
||||
Decrypt one block with aes
|
||||
|
||||
@param {int[]} data 16-Byte cipher
|
||||
@param {int[]} expanded_key 176/208/240-Byte expanded key
|
||||
@returns {int[]} 16-Byte state
|
||||
"""
|
||||
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
|
||||
|
||||
for i in range(rounds, 0, -1):
|
||||
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
|
||||
if i != rounds:
|
||||
data = mix_columns_inv(data)
|
||||
data = shift_rows_inv(data)
|
||||
data = sub_bytes_inv(data)
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
|
||||
@@ -139,14 +184,69 @@ SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
|
||||
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
|
||||
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
|
||||
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
|
||||
MIX_COLUMN_MATRIX = ((2,3,1,1),
|
||||
(1,2,3,1),
|
||||
(1,1,2,3),
|
||||
(3,1,1,2))
|
||||
SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d)
|
||||
MIX_COLUMN_MATRIX = ((0x2,0x3,0x1,0x1),
|
||||
(0x1,0x2,0x3,0x1),
|
||||
(0x1,0x1,0x2,0x3),
|
||||
(0x3,0x1,0x1,0x2))
|
||||
MIX_COLUMN_MATRIX_INV = ((0xE,0xB,0xD,0x9),
|
||||
(0x9,0xE,0xB,0xD),
|
||||
(0xD,0x9,0xE,0xB),
|
||||
(0xB,0xD,0x9,0xE))
|
||||
RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
|
||||
0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
|
||||
0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
|
||||
0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
|
||||
0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
|
||||
0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
|
||||
0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
|
||||
0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
|
||||
0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
|
||||
0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
|
||||
0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
|
||||
0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
|
||||
0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
|
||||
0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
|
||||
0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
|
||||
0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01)
|
||||
RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
|
||||
0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
|
||||
0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
|
||||
0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
|
||||
0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
|
||||
0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
|
||||
0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
|
||||
0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
|
||||
0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
|
||||
0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
|
||||
0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
|
||||
0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
|
||||
0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
|
||||
0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
|
||||
0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
|
||||
0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07)
|
||||
|
||||
def sub_bytes(data):
|
||||
return [SBOX[x] for x in data]
|
||||
|
||||
def sub_bytes_inv(data):
|
||||
return [SBOX_INV[x] for x in data]
|
||||
|
||||
def rotate(data):
|
||||
return data[1:] + [data[0]]
|
||||
|
||||
@@ -160,30 +260,31 @@ def key_schedule_core(data, rcon_iteration):
|
||||
def xor(data1, data2):
|
||||
return [x^y for x, y in zip(data1, data2)]
|
||||
|
||||
def mix_column(data):
|
||||
def rijndael_mul(a, b):
|
||||
if(a==0 or b==0):
|
||||
return 0
|
||||
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
|
||||
|
||||
def mix_column(data, matrix):
|
||||
data_mixed = []
|
||||
for row in range(4):
|
||||
mixed = 0
|
||||
for column in range(4):
|
||||
addend = data[column]
|
||||
if MIX_COLUMN_MATRIX[row][column] in (2,3):
|
||||
addend <<= 1
|
||||
if addend > 0xff:
|
||||
addend &= 0xff
|
||||
addend ^= 0x1b
|
||||
if MIX_COLUMN_MATRIX[row][column] == 3:
|
||||
addend ^= data[column]
|
||||
mixed ^= addend & 0xff
|
||||
# xor is (+) and (-)
|
||||
mixed ^= rijndael_mul(data[column], matrix[row][column])
|
||||
data_mixed.append(mixed)
|
||||
return data_mixed
|
||||
|
||||
def mix_columns(data):
|
||||
def mix_columns(data, matrix=MIX_COLUMN_MATRIX):
|
||||
data_mixed = []
|
||||
for i in range(4):
|
||||
column = data[i*4 : (i+1)*4]
|
||||
data_mixed += mix_column(column)
|
||||
data_mixed += mix_column(column, matrix)
|
||||
return data_mixed
|
||||
|
||||
def mix_columns_inv(data):
|
||||
return mix_columns(data, MIX_COLUMN_MATRIX_INV)
|
||||
|
||||
def shift_rows(data):
|
||||
data_shifted = []
|
||||
for column in range(4):
|
||||
@@ -191,6 +292,13 @@ def shift_rows(data):
|
||||
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
|
||||
return data_shifted
|
||||
|
||||
def shift_rows_inv(data):
|
||||
data_shifted = []
|
||||
for column in range(4):
|
||||
for row in range(4):
|
||||
data_shifted.append( data[((column - row) & 0b11) * 4 + row] )
|
||||
return data_shifted
|
||||
|
||||
def inc(data):
|
||||
data = data[:] # copy
|
||||
for i in range(len(data)-1,-1,-1):
|
||||
|
23
youtube_dl/downloader/__init__.py
Normal file
23
youtube_dl/downloader/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from .common import FileDownloader
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtmp import RtmpFD
|
||||
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
def get_suitable_downloader(info_dict):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
url = info_dict['url']
|
||||
|
||||
if url.startswith('rtmp'):
|
||||
return RtmpFD
|
||||
if determine_ext(url) == u'm3u8':
|
||||
return HlsFD
|
||||
if url.startswith('mms') or url.startswith('rtsp'):
|
||||
return MplayerFD
|
||||
else:
|
||||
return HttpFD
|
||||
|
321
youtube_dl/downloader/common.py
Normal file
321
youtube_dl/downloader/common.py
Normal file
@@ -0,0 +1,321 @@
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
timeconvert,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
class FileDownloader(object):
|
||||
"""File Downloader class.
|
||||
|
||||
File downloader objects are the ones responsible of downloading the
|
||||
actual video file and writing it to disk.
|
||||
|
||||
File downloaders accept a lot of parameters. In order not to saturate
|
||||
the object constructor with arguments, it receives a dictionary of
|
||||
options instead.
|
||||
|
||||
Available options:
|
||||
|
||||
verbose: Print additional info to stdout.
|
||||
quiet: Do not print messages to stdout.
|
||||
ratelimit: Download speed limit, in bytes/sec.
|
||||
retries: Number of times to retry for HTTP error 5xx
|
||||
buffersize: Size of download buffer in bytes.
|
||||
noresizebuffer: Do not automatically resize the download buffer.
|
||||
continuedl: Try to continue downloads if possible.
|
||||
noprogress: Do not print the progress bar.
|
||||
logtostderr: Log messages to stderr instead of stdout.
|
||||
consoletitle: Display progress in console window's titlebar.
|
||||
nopart: Do not use temporary .part files.
|
||||
updatetime: Use the Last-modified header to set output file timestamps.
|
||||
test: Download only first bytes to test the downloader.
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
|
||||
Subclasses of this one must re-define the real_download method.
|
||||
"""
|
||||
|
||||
params = None
|
||||
|
||||
def __init__(self, ydl, params):
|
||||
"""Create a FileDownloader object with the given options."""
|
||||
self.ydl = ydl
|
||||
self._progress_hooks = []
|
||||
self.params = params
|
||||
|
||||
@staticmethod
|
||||
def format_seconds(seconds):
|
||||
(mins, secs) = divmod(seconds, 60)
|
||||
(hours, mins) = divmod(mins, 60)
|
||||
if hours > 99:
|
||||
return '--:--:--'
|
||||
if hours == 0:
|
||||
return '%02d:%02d' % (mins, secs)
|
||||
else:
|
||||
return '%02d:%02d:%02d' % (hours, mins, secs)
|
||||
|
||||
@staticmethod
|
||||
def calc_percent(byte_counter, data_len):
|
||||
if data_len is None:
|
||||
return None
|
||||
return float(byte_counter) / float(data_len) * 100.0
|
||||
|
||||
@staticmethod
|
||||
def format_percent(percent):
|
||||
if percent is None:
|
||||
return '---.-%'
|
||||
return '%6s' % ('%3.1f%%' % percent)
|
||||
|
||||
@staticmethod
|
||||
def calc_eta(start, now, total, current):
|
||||
if total is None:
|
||||
return None
|
||||
dif = now - start
|
||||
if current == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
rate = float(current) / dif
|
||||
return int((float(total) - float(current)) / rate)
|
||||
|
||||
@staticmethod
|
||||
def format_eta(eta):
|
||||
if eta is None:
|
||||
return '--:--'
|
||||
return FileDownloader.format_seconds(eta)
|
||||
|
||||
@staticmethod
|
||||
def calc_speed(start, now, bytes):
|
||||
dif = now - start
|
||||
if bytes == 0 or dif < 0.001: # One millisecond
|
||||
return None
|
||||
return float(bytes) / dif
|
||||
|
||||
@staticmethod
|
||||
def format_speed(speed):
|
||||
if speed is None:
|
||||
return '%10s' % '---b/s'
|
||||
return '%10s' % ('%s/s' % format_bytes(speed))
|
||||
|
||||
@staticmethod
|
||||
def best_block_size(elapsed_time, bytes):
|
||||
new_min = max(bytes / 2.0, 1.0)
|
||||
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
|
||||
if elapsed_time < 0.001:
|
||||
return int(new_max)
|
||||
rate = bytes / elapsed_time
|
||||
if rate > new_max:
|
||||
return int(new_max)
|
||||
if rate < new_min:
|
||||
return int(new_min)
|
||||
return int(rate)
|
||||
|
||||
@staticmethod
|
||||
def parse_bytes(bytestr):
|
||||
"""Parse a string indicating a byte quantity into an integer."""
|
||||
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
|
||||
if matchobj is None:
|
||||
return None
|
||||
number = float(matchobj.group(1))
|
||||
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
|
||||
return int(round(number * multiplier))
|
||||
|
||||
def to_screen(self, *args, **kargs):
|
||||
self.ydl.to_screen(*args, **kargs)
|
||||
|
||||
def to_stderr(self, message):
|
||||
self.ydl.to_screen(message)
|
||||
|
||||
def to_console_title(self, message):
|
||||
self.ydl.to_console_title(message)
|
||||
|
||||
def trouble(self, *args, **kargs):
|
||||
self.ydl.trouble(*args, **kargs)
|
||||
|
||||
def report_warning(self, *args, **kargs):
|
||||
self.ydl.report_warning(*args, **kargs)
|
||||
|
||||
def report_error(self, *args, **kargs):
|
||||
self.ydl.report_error(*args, **kargs)
|
||||
|
||||
def slow_down(self, start_time, byte_counter):
|
||||
"""Sleep if the download speed is over the rate limit."""
|
||||
rate_limit = self.params.get('ratelimit', None)
|
||||
if rate_limit is None or byte_counter == 0:
|
||||
return
|
||||
now = time.time()
|
||||
elapsed = now - start_time
|
||||
if elapsed <= 0.0:
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == u'-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
return filename
|
||||
return filename + u'.part'
|
||||
|
||||
def undo_temp_name(self, filename):
|
||||
if filename.endswith(u'.part'):
|
||||
return filename[:-len(u'.part')]
|
||||
return filename
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
return
|
||||
os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
|
||||
except (IOError, OSError) as err:
|
||||
self.report_error(u'unable to rename file')
|
||||
|
||||
def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
return
|
||||
filetime = timeconvert(timestr)
|
||||
if filetime is None:
|
||||
return filetime
|
||||
# Ignore obviously invalid dates
|
||||
if filetime == 0:
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
def report_destination(self, filename):
|
||||
"""Report destination filename."""
|
||||
self.to_screen(u'[download] Destination: ' + filename)
|
||||
|
||||
def _report_progress_status(self, msg, is_last_line=False):
|
||||
fullmsg = u'[download] ' + msg
|
||||
if self.params.get('progress_with_newline', False):
|
||||
self.to_screen(fullmsg)
|
||||
else:
|
||||
if os.name == 'nt':
|
||||
prev_len = getattr(self, '_report_progress_prev_line_length',
|
||||
0)
|
||||
if prev_len > len(fullmsg):
|
||||
fullmsg += u' ' * (prev_len - len(fullmsg))
|
||||
self._report_progress_prev_line_length = len(fullmsg)
|
||||
clear_line = u'\r'
|
||||
else:
|
||||
clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
|
||||
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
|
||||
self.to_console_title(u'youtube-dl ' + msg)
|
||||
|
||||
def report_progress(self, percent, data_len_str, speed, eta):
|
||||
"""Report download progress."""
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
if eta is not None:
|
||||
eta_str = self.format_eta(eta)
|
||||
else:
|
||||
eta_str = 'Unknown ETA'
|
||||
if percent is not None:
|
||||
percent_str = self.format_percent(percent)
|
||||
else:
|
||||
percent_str = 'Unknown %'
|
||||
speed_str = self.format_speed(speed)
|
||||
|
||||
msg = (u'%s of %s at %s ETA %s' %
|
||||
(percent_str, data_len_str, speed_str, eta_str))
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
|
||||
if self.params.get('noprogress', False):
|
||||
return
|
||||
downloaded_str = format_bytes(downloaded_data_len)
|
||||
speed_str = self.format_speed(speed)
|
||||
elapsed_str = FileDownloader.format_seconds(elapsed)
|
||||
msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
|
||||
self._report_progress_status(msg)
|
||||
|
||||
def report_finish(self, data_len_str, tot_time):
|
||||
"""Report download finished."""
|
||||
if self.params.get('noprogress', False):
|
||||
self.to_screen(u'[download] Download completed')
|
||||
else:
|
||||
self._report_progress_status(
|
||||
(u'100%% of %s in %s' %
|
||||
(data_len_str, self.format_seconds(tot_time))),
|
||||
is_last_line=True)
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
|
||||
|
||||
def report_retry(self, count, retries):
|
||||
"""Report retry in case of HTTP error 5xx"""
|
||||
self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
|
||||
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen(u'[download] %s has already been downloaded' % file_name)
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen(u'[download] The file has already been downloaded')
|
||||
|
||||
def report_unable_to_resume(self):
|
||||
"""Report it was impossible to resume download."""
|
||||
self.to_screen(u'[download] Unable to resume')
|
||||
|
||||
def download(self, filename, info_dict):
|
||||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
url = info_dict['url']
|
||||
|
||||
# Check file already present
|
||||
if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
|
||||
self.report_file_already_downloaded(filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
})
|
||||
return True
|
||||
else:
|
||||
return self.real_download(filename, info_dict)
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
"""Real download process. Redefine in subclasses."""
|
||||
raise NotImplementedError(u'This method must be implemented by sublcasses')
|
||||
|
||||
def _hook_progress(self, status):
|
||||
for ph in self._progress_hooks:
|
||||
ph(status)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
""" ph gets called on download progress, with a dictionary with the entries
|
||||
* filename: The final filename
|
||||
* status: One of "downloading" and "finished"
|
||||
|
||||
It can also have some of the following entries:
|
||||
|
||||
* downloaded_bytes: Bytes on disks
|
||||
* total_bytes: Total bytes, None if unknown
|
||||
* tmpfilename: The filename we're currently writing to
|
||||
* eta: The estimated time in seconds, None if unknown
|
||||
* speed: The download speed in bytes/second, None if unknown
|
||||
|
||||
Hooks are guaranteed to be called at least once (with status "finished")
|
||||
if the download is successful.
|
||||
"""
|
||||
self._progress_hooks.append(ph)
|
||||
|
44
youtube_dl/downloader/hls.py
Normal file
44
youtube_dl/downloader/hls.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class HlsFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
|
||||
'-bsf:a', 'aac_adtstoasc', tmpfilename]
|
||||
|
||||
for program in ['avconv', 'ffmpeg']:
|
||||
try:
|
||||
subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
break
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
else:
|
||||
self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
|
||||
cmd = [program] + args
|
||||
|
||||
retval = subprocess.call(cmd)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'ffmpeg exited with code %d' % retval)
|
||||
return False
|
191
youtube_dl/downloader/http.py
Normal file
191
youtube_dl/downloader/http.py
Normal file
@@ -0,0 +1,191 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
compat_urllib_error,
|
||||
ContentTooShortError,
|
||||
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
tmpfilename = self.temp_name(filename)
|
||||
stream = None
|
||||
|
||||
# Do not include the Accept-Encoding header
|
||||
headers = {'Youtubedl-no-compression': 'True'}
|
||||
if 'user_agent' in info_dict:
|
||||
headers['Youtubedl-user-agent'] = info_dict['user_agent']
|
||||
basic_request = compat_urllib_request.Request(url, None, headers)
|
||||
request = compat_urllib_request.Request(url, None, headers)
|
||||
|
||||
if self.params.get('test', False):
|
||||
request.add_header('Range','bytes=0-10240')
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', False):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range','bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
else:
|
||||
resume_len = 0
|
||||
|
||||
count = 0
|
||||
retries = self.params.get('retries', 0)
|
||||
while count <= retries:
|
||||
# Establish connection
|
||||
try:
|
||||
if count == 0 and 'urlhandle' in info_dict:
|
||||
data = info_dict['urlhandle']
|
||||
data = compat_urllib_request.urlopen(request)
|
||||
break
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if (err.code < 500 or err.code >= 600) and err.code != 416:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
elif err.code == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
data = compat_urllib_request.urlopen(basic_request)
|
||||
content_length = data.info()['Content-Length']
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
if (content_length is not None and
|
||||
(resume_len - 100 < int(content_length) < resume_len + 100)):
|
||||
# The file had already been fully downloaded.
|
||||
# Explanation to the above condition: in issue #175 it was revealed that
|
||||
# YouTube sometimes adds or removes a few bytes from the end of the file,
|
||||
# changing the file size slightly and causing problems for some users. So
|
||||
# I decided to implement a suggested change and consider the file
|
||||
# completely downloaded if the file size differs less than 100 bytes from
|
||||
# the one in the hard drive.
|
||||
self.report_file_already_downloaded(filename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
open_mode = 'wb'
|
||||
break
|
||||
# Retry
|
||||
count += 1
|
||||
if count <= retries:
|
||||
self.report_retry(count, retries)
|
||||
|
||||
if count > retries:
|
||||
self.report_error(u'giving up after %s retries' % retries)
|
||||
return False
|
||||
|
||||
data_len = data.info().get('Content-length', None)
|
||||
if data_len is not None:
|
||||
data_len = int(data_len) + resume_len
|
||||
min_data_len = self.params.get("min_filesize", None)
|
||||
max_data_len = self.params.get("max_filesize", None)
|
||||
if min_data_len is not None and data_len < min_data_len:
|
||||
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
|
||||
return False
|
||||
if max_data_len is not None and data_len > max_data_len:
|
||||
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
|
||||
return False
|
||||
|
||||
data_len_str = format_bytes(data_len)
|
||||
byte_counter = 0 + resume_len
|
||||
block_size = self.params.get('buffersize', 1024)
|
||||
start = time.time()
|
||||
while True:
|
||||
# Download and write
|
||||
before = time.time()
|
||||
data_block = data.read(block_size)
|
||||
after = time.time()
|
||||
if len(data_block) == 0:
|
||||
break
|
||||
byte_counter += len(data_block)
|
||||
|
||||
# Open file just in time
|
||||
if stream is None:
|
||||
try:
|
||||
(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
|
||||
assert stream is not None
|
||||
filename = self.undo_temp_name(tmpfilename)
|
||||
self.report_destination(filename)
|
||||
except (OSError, IOError) as err:
|
||||
self.report_error(u'unable to open for writing: %s' % str(err))
|
||||
return False
|
||||
try:
|
||||
stream.write(data_block)
|
||||
except (IOError, OSError):
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'unable to write data: %s' % str(err))
|
||||
return False
|
||||
if not self.params.get('noresizebuffer', False):
|
||||
block_size = self.best_block_size(after - before, len(data_block))
|
||||
|
||||
# Progress message
|
||||
speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
|
||||
if data_len is None:
|
||||
eta = percent = None
|
||||
else:
|
||||
percent = self.calc_percent(byte_counter, data_len)
|
||||
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
|
||||
# Apply rate limit
|
||||
self.slow_down(start, byte_counter - resume_len)
|
||||
|
||||
if stream is None:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'Did not get any data blocks')
|
||||
return False
|
||||
stream.close()
|
||||
self.report_finish(data_len_str, (time.time() - start))
|
||||
if data_len is not None and byte_counter != data_len:
|
||||
raise ContentTooShortError(byte_counter, int(data_len))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
'total_bytes': byte_counter,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
|
||||
return True
|
39
youtube_dl/downloader/mplayer.py
Normal file
39
youtube_dl/downloader/mplayer.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class MplayerFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
try:
|
||||
subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'mplayer exited with code %d' % retval)
|
||||
return False
|
178
youtube_dl/downloader/rtmp.py
Normal file
178
youtube_dl/downloader/rtmp.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
)
|
||||
|
||||
|
||||
class RtmpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = u''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
|
||||
speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
data_len_str = u'~' + format_bytes(data_len)
|
||||
self.report_progress(percent, data_len_str, speed, eta)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'speed': speed,
|
||||
})
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1))*1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
|
||||
cursor_in_new_line = False
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'speed': speed,
|
||||
})
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen(u'[rtmpdump] '+line)
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen(u'')
|
||||
return proc.returncode
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url', None)
|
||||
page_url = info_dict.get('page_url', None)
|
||||
play_path = info_dict.get('play_path', None)
|
||||
tc_url = info_dict.get('tc_url', None)
|
||||
live = info_dict.get('rtmp_live', False)
|
||||
conn = info_dict.get('rtmp_conn', None)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
test = self.params.get('test', False)
|
||||
|
||||
# Check for rtmpdump first
|
||||
try:
|
||||
subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
|
||||
except (OSError, IOError):
|
||||
self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
|
||||
return False
|
||||
|
||||
# Download using rtmpdump. rtmpdump returns exit code 2 when
|
||||
# the connection was interrumpted and resuming appears to be
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
basic_args += ['--pageUrl', page_url]
|
||||
if play_path is not None:
|
||||
basic_args += ['--playpath', play_path]
|
||||
if tc_url is not None:
|
||||
basic_args += ['--tcUrl', url]
|
||||
if test:
|
||||
basic_args += ['--stop', '1']
|
||||
if live:
|
||||
basic_args += ['--live']
|
||||
if conn:
|
||||
basic_args += ['--conn', conn]
|
||||
args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
|
||||
if self.params.get('verbose', False):
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
except ImportError:
|
||||
shell_quote = repr
|
||||
self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
|
||||
while (retval == 2 or retval == 1) and not test:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == 1:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
if prevsize == cursize and retval == 2 and cursize > 1024:
|
||||
self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
|
||||
retval = 0
|
||||
break
|
||||
if retval == 0 or (test and retval == 2):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen(u'[rtmpdump] %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
})
|
||||
return True
|
||||
else:
|
||||
self.to_stderr(u"\n")
|
||||
self.report_error(u'rtmpdump exited with code %d' % retval)
|
||||
return False
|
@@ -1,6 +1,7 @@
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .addanime import AddAnimeIE
|
||||
from .anitube import AnitubeIE
|
||||
from .aparat import AparatIE
|
||||
from .appletrailers import AppleTrailersIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .ard import ARDIE
|
||||
@@ -32,6 +33,7 @@ from .collegehumor import CollegeHumorIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .condenast import CondeNastIE
|
||||
from .criterion import CriterionIE
|
||||
from .crunchyroll import CrunchyrollIE
|
||||
from .cspan import CSpanIE
|
||||
from .d8 import D8IE
|
||||
from .dailymotion import (
|
||||
@@ -82,6 +84,10 @@ from .ina import InaIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE
|
||||
from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jukebox import JukeboxIE
|
||||
from .justintv import JustinTVIE
|
||||
|
@@ -1,11 +1,6 @@
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class AcademicEarthCourseIE(InfoExtractor):
|
||||
|
56
youtube_dl/extractor/aparat.py
Normal file
56
youtube_dl/extractor/aparat.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#coding: utf-8
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
)
|
||||
|
||||
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.aparat.com/v/wP8On',
|
||||
u'file': u'wP8On.mp4',
|
||||
u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1',
|
||||
u'info_dict': {
|
||||
u"title": u"تیم گلکسی 11 - زومیت",
|
||||
},
|
||||
#u'skip': u'Extremely unreliable',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' +
|
||||
video_id + u'/vt/frame')
|
||||
webpage = self._download_webpage(embed_url, video_id)
|
||||
|
||||
video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage)
|
||||
for i, video_url in enumerate(video_urls):
|
||||
req = HEADRequest(video_url)
|
||||
res = self._request_webpage(
|
||||
req, video_id, note=u'Testing video URL %d' % i, errnote=False)
|
||||
if res:
|
||||
break
|
||||
else:
|
||||
raise ExtractorError(u'No working video URLs found')
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title')
|
||||
thumbnail = self._search_regex(
|
||||
r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -9,7 +9,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BlinkxIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/ce/|blinkx:)(?P<id>[^?]+)'
|
||||
_VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
||||
_IE_NAME = u'blinkx'
|
||||
|
||||
_TEST = {
|
||||
@@ -54,6 +54,10 @@ class BlinkxIE(InfoExtractor):
|
||||
})
|
||||
elif m['type'] == 'original':
|
||||
duration = m['d']
|
||||
elif m['type'] == 'youtube':
|
||||
yt_id = m['link']
|
||||
self.to_screen(u'Youtube video detected: %s' % yt_id)
|
||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
||||
elif m['type'] in ('flv', 'mp4'):
|
||||
vcodec = remove_start(m['vcodec'], 'ff')
|
||||
acodec = remove_start(m['acodec'], 'ff')
|
||||
|
@@ -70,13 +70,14 @@ class BlipTVIE(InfoExtractor):
|
||||
info = None
|
||||
urlh = self._request_webpage(request, None, False,
|
||||
u'unable to download video info webpage')
|
||||
|
||||
if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
|
||||
basename = url.split('/')[-1]
|
||||
title,ext = os.path.splitext(basename)
|
||||
title = title.decode('UTF-8')
|
||||
ext = ext.replace('.', '')
|
||||
self.report_direct_download(title)
|
||||
info = {
|
||||
return {
|
||||
'id': title,
|
||||
'url': url,
|
||||
'uploader': None,
|
||||
@@ -85,49 +86,47 @@ class BlipTVIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'urlhandle': urlh
|
||||
}
|
||||
if info is None: # Regular URL
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
json_code = json_code_bytes.decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
|
||||
|
||||
try:
|
||||
json_data = json.loads(json_code)
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
try:
|
||||
json_code_bytes = urlh.read()
|
||||
json_code = json_code_bytes.decode('utf-8')
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err))
|
||||
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
if 'additionalMedia' in data:
|
||||
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
|
||||
best_format = formats[-1]
|
||||
video_url = best_format['url']
|
||||
else:
|
||||
video_url = data['media']['url']
|
||||
umobj = re.match(self._URL_EXT, video_url)
|
||||
if umobj is None:
|
||||
raise ValueError('Can not determine filename extension')
|
||||
ext = umobj.group(1)
|
||||
try:
|
||||
json_data = json.loads(json_code)
|
||||
if 'Post' in json_data:
|
||||
data = json_data['Post']
|
||||
else:
|
||||
data = json_data
|
||||
|
||||
info = {
|
||||
'id': compat_str(data['item_id']),
|
||||
'url': video_url,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'ext': ext,
|
||||
'format': data['media']['mimeType'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'player_url': data['embedUrl'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
}
|
||||
except (ValueError,KeyError) as err:
|
||||
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
|
||||
upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
|
||||
if 'additionalMedia' in data:
|
||||
formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height']))
|
||||
best_format = formats[-1]
|
||||
video_url = best_format['url']
|
||||
else:
|
||||
video_url = data['media']['url']
|
||||
umobj = re.match(self._URL_EXT, video_url)
|
||||
if umobj is None:
|
||||
raise ValueError('Can not determine filename extension')
|
||||
ext = umobj.group(1)
|
||||
|
||||
return [info]
|
||||
return {
|
||||
'id': compat_str(data['item_id']),
|
||||
'url': video_url,
|
||||
'uploader': data['display_name'],
|
||||
'upload_date': upload_date,
|
||||
'title': data['title'],
|
||||
'ext': ext,
|
||||
'format': data['media']['mimeType'],
|
||||
'thumbnail': data['thumbnailUrl'],
|
||||
'description': data['description'],
|
||||
'player_url': data['embedUrl'],
|
||||
'user_agent': 'iTunes/10.6.1',
|
||||
}
|
||||
except (ValueError, KeyError) as err:
|
||||
raise ExtractorError(u'Unable to parse video information: %s' % repr(err))
|
||||
|
||||
|
||||
class BlipTVUserIE(InfoExtractor):
|
||||
|
@@ -26,7 +26,7 @@ class BrightcoveIE(InfoExtractor):
|
||||
# From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
|
||||
u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
|
||||
u'file': u'2371591881001.mp4',
|
||||
u'md5': u'8eccab865181d29ec2958f32a6a754f5',
|
||||
u'md5': u'5423e113865d26e40624dce2e4b45d95',
|
||||
u'note': u'Test Brightcove downloads and detection in GenericIE',
|
||||
u'info_dict': {
|
||||
u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
|
||||
|
@@ -170,6 +170,8 @@ class InfoExtractor(object):
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if errnote is False:
|
||||
return False
|
||||
if errnote is None:
|
||||
errnote = u'Unable to download webpage'
|
||||
errmsg = u'%s: %s' % (errnote, compat_str(err))
|
||||
@@ -263,7 +265,8 @@ class InfoExtractor(object):
|
||||
self.to_screen(u'Logging in')
|
||||
|
||||
#Methods for following #608
|
||||
def url_result(self, url, ie=None, video_id=None):
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
#TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
@@ -272,7 +275,8 @@ class InfoExtractor(object):
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
return video_info
|
||||
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
|
||||
@staticmethod
|
||||
def playlist_result(entries, playlist_id=None, playlist_title=None):
|
||||
"""Returns a playlist"""
|
||||
video_info = {'_type': 'playlist',
|
||||
'entries': entries}
|
||||
|
171
youtube_dl/extractor/crunchyroll.py
Normal file
171
youtube_dl/extractor/crunchyroll.py
Normal file
@@ -0,0 +1,171 @@
|
||||
# encoding: utf-8
|
||||
import re, base64, zlib
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
inc,
|
||||
)
|
||||
|
||||
class CrunchyrollIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?(?P<url>crunchyroll\.com/[^/]*/[^/?&]*?(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
u'file': u'645513.flv',
|
||||
#u'md5': u'b1639fd6ddfaa43788c85f6d1dddd412',
|
||||
u'info_dict': {
|
||||
u'title': u'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
u'description': u'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
u'thumbnail': u'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
u'uploader': u'Yomiuri Telecasting Corporation (YTV)',
|
||||
u'upload_date': u'20131013',
|
||||
},
|
||||
u'params': {
|
||||
# rtmp
|
||||
u'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
u'360': (u'60', u'106'),
|
||||
u'480': (u'61', u'106'),
|
||||
u'720': (u'62', u'106'),
|
||||
u'1080': (u'80', u'108'),
|
||||
}
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(data)
|
||||
iv = bytes_to_intlist(iv)
|
||||
id = int(id)
|
||||
|
||||
def obfuscate_key_aux(count, modulo, start):
|
||||
output = list(start)
|
||||
for _ in range(count):
|
||||
output.append(output[-1] + output[-2])
|
||||
# cut off start values
|
||||
output = output[2:]
|
||||
output = list(map(lambda x: x % modulo + 33, output))
|
||||
return output
|
||||
|
||||
def obfuscate_key(key):
|
||||
num1 = int(floor(pow(2, 25) * sqrt(6.9)))
|
||||
num2 = (num1 ^ key) << 5
|
||||
num3 = key ^ num1
|
||||
num4 = num3 ^ (num3 >> 3) ^ num2
|
||||
prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
|
||||
shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode(u'ascii')).digest())
|
||||
# Extend 160 Bit hash to 256 Bit
|
||||
return shaHash + [0] * 12
|
||||
|
||||
key = obfuscate_key(id)
|
||||
class Counter:
|
||||
__value = iv
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
return temp
|
||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||
return zlib.decompress(decrypted_data)
|
||||
|
||||
def _convert_subtitles_to_srt(self, subtitles):
|
||||
i=1
|
||||
output = u''
|
||||
for start, end, text in re.findall(r'<event [^>]*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles):
|
||||
start = start.replace(u'.', u',')
|
||||
end = end.replace(u'.', u',')
|
||||
text = clean_html(text)
|
||||
text = text.replace(u'\\N', u'\n')
|
||||
if not text:
|
||||
continue
|
||||
output += u'%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
|
||||
i+=1
|
||||
return output
|
||||
|
||||
def _real_extract(self,url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
webpage_url = u'http://www.' + mobj.group('url')
|
||||
video_id = mobj.group(u'video_id')
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
note_m = self._html_search_regex(r'<div class="showmedia-trailer-notice">(.+?)</div>', webpage, u'trailer-notice', default=u'')
|
||||
if note_m:
|
||||
raise ExtractorError(note_m)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1[^>]*>(.+?)</h1>', webpage, u'video_title', flags=re.DOTALL)
|
||||
video_title = re.sub(r' {2,}', u' ', video_title)
|
||||
video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, u'video_description', default=u'')
|
||||
if not video_description:
|
||||
video_description = None
|
||||
video_upload_date = self._html_search_regex(r'<div>Availability for free users:(.+?)</div>', webpage, u'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||
if video_upload_date:
|
||||
video_upload_date = unified_strdate(video_upload_date)
|
||||
video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, u'video_uploader', fatal=False, flags=re.DOTALL)
|
||||
|
||||
playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, u'playerdata_url'))
|
||||
playerdata_req = compat_urllib_request.Request(playerdata_url)
|
||||
playerdata_req.data = compat_urllib_parse.urlencode({u'current_page': webpage_url})
|
||||
playerdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
|
||||
playerdata = self._download_webpage(playerdata_req, video_id, note=u'Downloading media info')
|
||||
|
||||
stream_id = self._search_regex(r'<media_id>([^<]+)', playerdata, u'stream_id')
|
||||
video_thumbnail = self._search_regex(r'<episode_image_url>([^<]+)', playerdata, u'thumbnail', fatal=False)
|
||||
|
||||
formats = []
|
||||
for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage):
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt+u'p'
|
||||
streamdata_req = compat_urllib_request.Request(u'http://www.crunchyroll.com/xml/')
|
||||
# urlencode doesn't work!
|
||||
streamdata_req.data = u'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality='+stream_quality+u'&media%5Fid='+stream_id+u'&video%5Fformat='+stream_format
|
||||
streamdata_req.add_header(u'Content-Type', u'application/x-www-form-urlencoded')
|
||||
streamdata_req.add_header(u'Content-Length', str(len(streamdata_req.data)))
|
||||
streamdata = self._download_webpage(streamdata_req, video_id, note=u'Downloading media info for '+video_format)
|
||||
video_url = self._search_regex(r'<host>([^<]+)', streamdata, u'video_url')
|
||||
video_play_path = self._search_regex(r'<file>([^<]+)', streamdata, u'video_play_path')
|
||||
formats.append({
|
||||
u'url': video_url,
|
||||
u'play_path': video_play_path,
|
||||
u'ext': 'flv',
|
||||
u'format': video_format,
|
||||
u'format_id': video_format,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
|
||||
sub_page = self._download_webpage(u'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id='+sub_id,\
|
||||
video_id, note=u'Downloading subtitles for '+sub_name)
|
||||
id = self._search_regex(r'id=\'([0-9]+)', sub_page, u'subtitle_id', fatal=False)
|
||||
iv = self._search_regex(r'<iv>([^<]+)', sub_page, u'subtitle_iv', fatal=False)
|
||||
data = self._search_regex(r'<data>([^<]+)', sub_page, u'subtitle_data', fatal=False)
|
||||
if not id or not iv or not data:
|
||||
continue
|
||||
id = int(id)
|
||||
iv = base64.b64decode(iv)
|
||||
data = base64.b64decode(data)
|
||||
|
||||
subtitle = self._decrypt_subtitles(data, iv, id).decode(u'utf-8')
|
||||
lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, u'subtitle_lang_code', fatal=False)
|
||||
if not lang_code:
|
||||
continue
|
||||
subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)
|
||||
|
||||
return {
|
||||
u'id': video_id,
|
||||
u'title': video_title,
|
||||
u'description': video_description,
|
||||
u'thumbnail': video_thumbnail,
|
||||
u'uploader': video_uploader,
|
||||
u'upload_date': video_upload_date,
|
||||
u'subtitles': subtitles,
|
||||
u'formats': formats,
|
||||
}
|
@@ -11,10 +11,14 @@ from ..utils import (
|
||||
compat_urlparse,
|
||||
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_basename,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -71,6 +75,27 @@ class GenericIE(InfoExtractor):
|
||||
u'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Direct link to a video
|
||||
{
|
||||
u'url': u'http://media.w3.org/2010/05/sintel/trailer.mp4',
|
||||
u'file': u'trailer.mp4',
|
||||
u'md5': u'67d406c2bcb6af27fa886f31aa934bbe',
|
||||
u'info_dict': {
|
||||
u'id': u'trailer',
|
||||
u'title': u'trailer',
|
||||
u'upload_date': u'20100513',
|
||||
}
|
||||
},
|
||||
# ooyala video
|
||||
{
|
||||
u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
|
||||
u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c',
|
||||
u'info_dict': {
|
||||
u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
|
||||
u'ext': u'mp4',
|
||||
u'title': u'2cc213299525360.mov', #that's what we get
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_download_webpage(self, video_id):
|
||||
@@ -83,23 +108,20 @@ class GenericIE(InfoExtractor):
|
||||
"""Report information extraction."""
|
||||
self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
|
||||
|
||||
def _test_redirect(self, url):
|
||||
def _send_head(self, url):
|
||||
"""Check if it is a redirect, like url shorteners, in case return the new url."""
|
||||
class HeadRequest(compat_urllib_request.Request):
|
||||
def get_method(self):
|
||||
return "HEAD"
|
||||
|
||||
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||
"""
|
||||
Subclass the HTTPRedirectHandler to make it use our
|
||||
HeadRequest also on the redirected URL
|
||||
HEADRequest also on the redirected URL
|
||||
"""
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
if code in (301, 302, 303, 307):
|
||||
newurl = newurl.replace(' ', '%20')
|
||||
newheaders = dict((k,v) for k,v in req.headers.items()
|
||||
if k.lower() not in ("content-length", "content-type"))
|
||||
return HeadRequest(newurl,
|
||||
return HEADRequest(newurl,
|
||||
headers=newheaders,
|
||||
origin_req_host=req.get_origin_req_host(),
|
||||
unverifiable=True)
|
||||
@@ -128,32 +150,49 @@ class GenericIE(InfoExtractor):
|
||||
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
|
||||
opener.add_handler(handler())
|
||||
|
||||
response = opener.open(HeadRequest(url))
|
||||
response = opener.open(HEADRequest(url))
|
||||
if response is None:
|
||||
raise ExtractorError(u'Invalid URL protocol')
|
||||
new_url = response.geturl()
|
||||
|
||||
if url == new_url:
|
||||
return False
|
||||
|
||||
self.report_following_redirect(new_url)
|
||||
return new_url
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
video_id = os.path.splitext(url.split('/')[-1])[0]
|
||||
|
||||
try:
|
||||
new_url = self._test_redirect(url)
|
||||
if new_url:
|
||||
return [self.url_result(new_url)]
|
||||
response = self._send_head(url)
|
||||
|
||||
# Check for redirect
|
||||
new_url = response.geturl()
|
||||
if url != new_url:
|
||||
self.report_following_redirect(new_url)
|
||||
return self.url_result(new_url)
|
||||
|
||||
# Check for direct link to a video
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
m = re.match(r'^(?P<type>audio|video|application(?=/ogg$))/(?P<format_id>.+)$', content_type)
|
||||
if m:
|
||||
upload_date = response.headers.get('Last-Modified')
|
||||
if upload_date:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': os.path.splitext(url_basename(url))[0],
|
||||
'formats': [{
|
||||
'format_id': m.group('format_id'),
|
||||
'url': url,
|
||||
'vcodec': u'none' if m.group('type') == 'audio' else None
|
||||
}],
|
||||
'upload_date': upload_date,
|
||||
}
|
||||
|
||||
except compat_urllib_error.HTTPError:
|
||||
# This may be a stupid server that doesn't like HEAD, our UA, or so
|
||||
pass
|
||||
|
||||
video_id = url.split('/')[-1]
|
||||
try:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
except ValueError:
|
||||
@@ -183,7 +222,7 @@ class GenericIE(InfoExtractor):
|
||||
self.to_screen(u'Brightcove video detected.')
|
||||
return self.url_result(bc_url, 'Brightcove')
|
||||
|
||||
# Look for embedded Vimeo player
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src="(https?://player.vimeo.com/video/.+?)"', webpage)
|
||||
if mobj:
|
||||
@@ -191,9 +230,18 @@ class GenericIE(InfoExtractor):
|
||||
surl = smuggle_url(player_url, {'Referer': url})
|
||||
return self.url_result(surl, 'Vimeo')
|
||||
|
||||
# Look for embedded (swf embed) Vimeo player
|
||||
mobj = re.search(
|
||||
r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'Vimeo')
|
||||
|
||||
# Look for embedded YouTube player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
|
||||
matches = re.findall(r'''(?x)
|
||||
(?:<iframe[^>]+?src=|embedSWF\(\s*)
|
||||
(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
|
||||
(?:embed|v)/.+?)
|
||||
\1''', webpage)
|
||||
if matches:
|
||||
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
|
||||
for tuppl in matches]
|
||||
@@ -247,6 +295,16 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group(1))
|
||||
|
||||
# Look for Aparat videos
|
||||
mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Aparat')
|
||||
|
||||
# Start with something easy: JW Player in SWFObject
|
||||
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
|
||||
if mobj is None:
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = u'imdb'
|
||||
IE_DESC = u'Internet Movie Database trailers'
|
||||
_VALID_URL = r'http://www\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||
_VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
u'url': u'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
@@ -27,7 +27,7 @@ class ImdbIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url,video_id)
|
||||
webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id)
|
||||
descr = get_element_by_attribute('itemprop', 'description', webpage)
|
||||
available_formats = re.findall(
|
||||
r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage,
|
||||
|
154
youtube_dl/extractor/ivi.py
Normal file
154
youtube_dl/extractor/ivi.py
Normal file
@@ -0,0 +1,154 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = u'ivi.ru'
|
||||
IE_NAME = u'ivi'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
# Single movie
|
||||
{
|
||||
u'url': u'http://www.ivi.ru/watch/53141',
|
||||
u'file': u'53141.mp4',
|
||||
u'md5': u'6ff5be2254e796ed346251d117196cf4',
|
||||
u'info_dict': {
|
||||
u'title': u'Иван Васильевич меняет профессию',
|
||||
u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346',
|
||||
u'duration': 5498,
|
||||
u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg',
|
||||
},
|
||||
u'skip': u'Only works from Russia',
|
||||
},
|
||||
# Serial's serie
|
||||
{
|
||||
u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791',
|
||||
u'file': u'74791.mp4',
|
||||
u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9',
|
||||
u'info_dict': {
|
||||
u'title': u'Дежурный ангел - 1 серия',
|
||||
u'duration': 2490,
|
||||
u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg',
|
||||
},
|
||||
u'skip': u'Only works from Russia',
|
||||
}
|
||||
]
|
||||
|
||||
# Sorted by quality
|
||||
_known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ']
|
||||
|
||||
# Sorted by size
|
||||
_known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480']
|
||||
|
||||
def _extract_description(self, html):
|
||||
m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html)
|
||||
return m.group('description') if m is not None else None
|
||||
|
||||
def _extract_comment_count(self, html):
|
||||
m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html)
|
||||
return int(m.group('commentcount')) if m is not None else 0
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
api_url = 'http://api.digitalaccess.ru/api/json/'
|
||||
|
||||
data = {u'method': u'da.content.get',
|
||||
u'params': [video_id, {u'site': u's183',
|
||||
u'referrer': u'http://www.ivi.ru/watch/%s' % video_id,
|
||||
u'contentid': video_id
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(api_url, json.dumps(data))
|
||||
|
||||
video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON')
|
||||
video_json = json.loads(video_json_page)
|
||||
|
||||
if u'error' in video_json:
|
||||
error = video_json[u'error']
|
||||
if error[u'origin'] == u'NoRedisValidData':
|
||||
raise ExtractorError(u'Video %s does not exist' % video_id, expected=True)
|
||||
raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True)
|
||||
|
||||
result = video_json[u'result']
|
||||
|
||||
formats = [{'url': x[u'url'],
|
||||
'format_id': x[u'content_format']
|
||||
} for x in result[u'files'] if x[u'content_format'] in self._known_formats]
|
||||
formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id']))
|
||||
|
||||
if len(formats) == 0:
|
||||
self._downloader.report_warning(u'No media links available for %s' % video_id)
|
||||
return
|
||||
|
||||
duration = result[u'duration']
|
||||
compilation = result[u'compilation']
|
||||
title = result[u'title']
|
||||
|
||||
title = '%s - %s' % (compilation, title) if compilation is not None else title
|
||||
|
||||
previews = result[u'preview']
|
||||
previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format']))
|
||||
thumbnail = previews[-1][u'url'] if len(previews) > 0 else None
|
||||
|
||||
video_page = self._download_webpage(url, video_id, u'Downloading video page')
|
||||
description = self._extract_description(video_page)
|
||||
comment_count = self._extract_comment_count(video_page)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'comment_count': comment_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class IviCompilationIE(InfoExtractor):
|
||||
IE_DESC = u'ivi.ru compilations'
|
||||
IE_NAME = u'ivi:compilation'
|
||||
_VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
|
||||
|
||||
def _extract_entries(self, html, compilation_id):
|
||||
return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi')
|
||||
for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
compilation_id = mobj.group('compilationid')
|
||||
season_id = mobj.group('seasonid')
|
||||
|
||||
if season_id is not None: # Season link
|
||||
season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id)
|
||||
playlist_id = '%s/season%s' % (compilation_id, season_id)
|
||||
playlist_title = self._html_search_meta(u'title', season_page, u'title')
|
||||
entries = self._extract_entries(season_page, compilation_id)
|
||||
else: # Compilation link
|
||||
compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page')
|
||||
playlist_id = compilation_id
|
||||
playlist_title = self._html_search_meta(u'title', compilation_page, u'title')
|
||||
seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page)
|
||||
if len(seasons) == 0: # No seasons in this compilation
|
||||
entries = self._extract_entries(compilation_page, compilation_id)
|
||||
else:
|
||||
entries = []
|
||||
for season_id in seasons:
|
||||
season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
|
||||
compilation_id, u'Downloading season %s web page' % season_id)
|
||||
entries.extend(self._extract_entries(season_page, compilation_id))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
@@ -8,23 +8,8 @@ from ..utils import (
|
||||
|
||||
class MDRIE(InfoExtractor):
|
||||
_VALID_URL = r'^(?P<domain>(?:https?://)?(?:www\.)?mdr\.de)/mediathek/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)_.*'
|
||||
|
||||
_TESTS = [{
|
||||
u'url': u'http://www.mdr.de/mediathek/themen/nachrichten/video165624_zc-c5c7de76_zs-3795826d.html',
|
||||
u'file': u'165624.mp4',
|
||||
u'md5': u'ae785f36ecbf2f19b42edf1bc9c85815',
|
||||
u'info_dict': {
|
||||
u"title": u"MDR aktuell Eins30 09.12.2013, 22:48 Uhr"
|
||||
},
|
||||
},
|
||||
{
|
||||
u'url': u'http://www.mdr.de/mediathek/radio/mdr1-radio-sachsen/audio718370_zc-67b21197_zs-1b9b2483.html',
|
||||
u'file': u'718370.mp3',
|
||||
u'md5': u'a9d21345a234c7b45dee612f290fd8d7',
|
||||
u'info_dict': {
|
||||
u"title": u"MDR 1 RADIO SACHSEN 10.12.2013, 05:00 Uhr"
|
||||
},
|
||||
}]
|
||||
|
||||
# No tests, MDR regularily deletes its videos
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
|
@@ -22,6 +22,11 @@ class OoyalaIE(InfoExtractor):
|
||||
def _url_for_embed_code(embed_code):
|
||||
return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
|
||||
|
||||
@classmethod
|
||||
def _build_url_result(cls, embed_code):
|
||||
return cls.url_result(cls._url_for_embed_code(embed_code),
|
||||
ie=cls.ie_key())
|
||||
|
||||
def _extract_result(self, info, more_info):
|
||||
return {'id': info['embedCode'],
|
||||
'ext': 'mp4',
|
||||
|
@@ -1,12 +1,7 @@
|
||||
# coding: utf-8
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class RadioFranceIE(InfoExtractor):
|
||||
@@ -42,11 +37,11 @@ class RadioFranceIE(InfoExtractor):
|
||||
webpage, u'audio URLs')
|
||||
formats = [
|
||||
{
|
||||
'format_id': m[0],
|
||||
'url': m[1],
|
||||
'format_id': fm[0],
|
||||
'url': fm[1],
|
||||
'vcodec': 'none',
|
||||
}
|
||||
for m in
|
||||
for fm in
|
||||
re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)
|
||||
]
|
||||
# No sorting, we don't know any more about these formats
|
||||
|
@@ -1,5 +1,6 @@
|
||||
# encoding: utf-8
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import json
|
||||
import hashlib
|
||||
@@ -10,6 +11,7 @@ from ..utils import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
@@ -132,7 +134,16 @@ class SmotriIE(InfoExtractor):
|
||||
# We will extract some from the video web page instead
|
||||
video_page_url = 'http://' + mobj.group('url')
|
||||
video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page')
|
||||
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div class="videoUnModer">(.*?)</div>', video_page,
|
||||
u'warning messagef', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
u'Video %s may not be available; smotri said: %s ' %
|
||||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if re.search(u'EroConfirmText">', video_page) is not None:
|
||||
self.report_age_confirmation()
|
||||
@@ -148,38 +159,44 @@ class SmotriIE(InfoExtractor):
|
||||
# Extract the rest of meta data
|
||||
video_title = self._search_meta(u'name', video_page, u'title')
|
||||
if not video_title:
|
||||
video_title = video_url.rsplit('/', 1)[-1]
|
||||
video_title = os.path.splitext(url_basename(video_url))[0]
|
||||
|
||||
video_description = self._search_meta(u'description', video_page)
|
||||
END_TEXT = u' на сайте Smotri.com'
|
||||
if video_description.endswith(END_TEXT):
|
||||
if video_description and video_description.endswith(END_TEXT):
|
||||
video_description = video_description[:-len(END_TEXT)]
|
||||
START_TEXT = u'Смотреть онлайн ролик '
|
||||
if video_description.startswith(START_TEXT):
|
||||
if video_description and video_description.startswith(START_TEXT):
|
||||
video_description = video_description[len(START_TEXT):]
|
||||
video_thumbnail = self._search_meta(u'thumbnail', video_page)
|
||||
|
||||
upload_date_str = self._search_meta(u'uploadDate', video_page, u'upload date')
|
||||
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
|
||||
video_upload_date = (
|
||||
(
|
||||
upload_date_m.group('year') +
|
||||
upload_date_m.group('month') +
|
||||
upload_date_m.group('day')
|
||||
if upload_date_str:
|
||||
upload_date_m = re.search(r'(?P<year>\d{4})\.(?P<month>\d{2})\.(?P<day>\d{2})T', upload_date_str)
|
||||
video_upload_date = (
|
||||
(
|
||||
upload_date_m.group('year') +
|
||||
upload_date_m.group('month') +
|
||||
upload_date_m.group('day')
|
||||
)
|
||||
if upload_date_m else None
|
||||
)
|
||||
if upload_date_m else None
|
||||
)
|
||||
else:
|
||||
video_upload_date = None
|
||||
|
||||
duration_str = self._search_meta(u'duration', video_page)
|
||||
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
|
||||
video_duration = (
|
||||
(
|
||||
(int(duration_m.group('hours')) * 60 * 60) +
|
||||
(int(duration_m.group('minutes')) * 60) +
|
||||
int(duration_m.group('seconds'))
|
||||
if duration_str:
|
||||
duration_m = re.search(r'T(?P<hours>[0-9]{2})H(?P<minutes>[0-9]{2})M(?P<seconds>[0-9]{2})S', duration_str)
|
||||
video_duration = (
|
||||
(
|
||||
(int(duration_m.group('hours')) * 60 * 60) +
|
||||
(int(duration_m.group('minutes')) * 60) +
|
||||
int(duration_m.group('seconds'))
|
||||
)
|
||||
if duration_m else None
|
||||
)
|
||||
if duration_m else None
|
||||
)
|
||||
else:
|
||||
video_duration = None
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
u'<div class="DescrUser"><div>Автор.*?onmouseover="popup_user_info[^"]+">(.*?)</a>',
|
||||
@@ -202,7 +219,7 @@ class SmotriIE(InfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'uploader_id': video_uploader_id,
|
||||
'video_duration': video_duration,
|
||||
'duration': video_duration,
|
||||
'view_count': video_view_count,
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'video_page_url': video_page_url
|
||||
|
@@ -24,7 +24,7 @@ class SoundcloudIE(InfoExtractor):
|
||||
"""
|
||||
|
||||
_VALID_URL = r'''^(?:https?://)?
|
||||
(?:(?:(?:www\.)?soundcloud\.com/
|
||||
(?:(?:(?:www\.|m\.)?soundcloud\.com/
|
||||
(?P<uploader>[\w\d-]+)/
|
||||
(?!sets/)(?P<title>[\w\d-]+)/?
|
||||
(?P<token>[^?]+?)?(?:[?].*)?$)
|
||||
|
@@ -15,7 +15,7 @@ class Vbox7IE(InfoExtractor):
|
||||
_TEST = {
|
||||
u'url': u'http://vbox7.com/play:249bb972c2',
|
||||
u'file': u'249bb972c2.flv',
|
||||
u'md5': u'9c70d6d956f888bdc08c124acc120cfe',
|
||||
u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
|
||||
u'info_dict': {
|
||||
u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
|
||||
}
|
||||
|
@@ -16,11 +16,20 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class VimeoIE(InfoExtractor):
|
||||
"""Information extractor for vimeo.com."""
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:.*?/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?P<proto>https?://)?
|
||||
(?:(?:www|(?P<player>player))\.)?
|
||||
vimeo(?P<pro>pro)?\.com/
|
||||
(?:.*?/)?
|
||||
(?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
|
||||
(?:videos?/)?
|
||||
(?P<id>[0-9]+)
|
||||
/?(?:[?&].*)?(?:[#].*)?$'''
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
IE_NAME = u'vimeo'
|
||||
_TESTS = [
|
||||
|
@@ -162,23 +162,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
# Dash audio
|
||||
'141', '172', '140', '171', '139',
|
||||
]
|
||||
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
|
||||
# Apple HTTP Live Streaming
|
||||
'96', '95', '94', '93', '92', '132', '151',
|
||||
# 3D
|
||||
'85', '102', '84', '101', '83', '100', '82',
|
||||
# Dash video
|
||||
'138', '248', '137', '247', '136', '246', '245',
|
||||
'244', '135', '243', '134', '242', '133', '160',
|
||||
# Dash audio
|
||||
'172', '141', '171', '140', '139',
|
||||
]
|
||||
_video_formats_map = {
|
||||
'flv': ['35', '34', '6', '5'],
|
||||
'3gp': ['36', '17', '13'],
|
||||
'mp4': ['38', '37', '22', '18'],
|
||||
'webm': ['46', '45', '44', '43'],
|
||||
}
|
||||
_video_extensions = {
|
||||
'13': '3gp',
|
||||
'17': '3gp',
|
||||
@@ -236,54 +219,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
'248': 'webm',
|
||||
}
|
||||
_video_dimensions = {
|
||||
'5': '400x240',
|
||||
'6': '???',
|
||||
'13': '???',
|
||||
'17': '176x144',
|
||||
'18': '640x360',
|
||||
'22': '1280x720',
|
||||
'34': '640x360',
|
||||
'35': '854x480',
|
||||
'36': '320x240',
|
||||
'37': '1920x1080',
|
||||
'38': '4096x3072',
|
||||
'43': '640x360',
|
||||
'44': '854x480',
|
||||
'45': '1280x720',
|
||||
'46': '1920x1080',
|
||||
'82': '360p',
|
||||
'83': '480p',
|
||||
'84': '720p',
|
||||
'85': '1080p',
|
||||
'92': '240p',
|
||||
'93': '360p',
|
||||
'94': '480p',
|
||||
'95': '720p',
|
||||
'96': '1080p',
|
||||
'100': '360p',
|
||||
'101': '480p',
|
||||
'102': '720p',
|
||||
'132': '240p',
|
||||
'151': '72p',
|
||||
'133': '240p',
|
||||
'134': '360p',
|
||||
'135': '480p',
|
||||
'136': '720p',
|
||||
'137': '1080p',
|
||||
'138': '>1080p',
|
||||
'139': '48k',
|
||||
'140': '128k',
|
||||
'141': '256k',
|
||||
'160': '192p',
|
||||
'171': '128k',
|
||||
'172': '256k',
|
||||
'242': '240p',
|
||||
'243': '360p',
|
||||
'244': '480p',
|
||||
'245': '480p',
|
||||
'246': '480p',
|
||||
'247': '720p',
|
||||
'248': '1080p',
|
||||
'5': {'width': 400, 'height': 240},
|
||||
'6': {},
|
||||
'13': {},
|
||||
'17': {'width': 176, 'height': 144},
|
||||
'18': {'width': 640, 'height': 360},
|
||||
'22': {'width': 1280, 'height': 720},
|
||||
'34': {'width': 640, 'height': 360},
|
||||
'35': {'width': 854, 'height': 480},
|
||||
'36': {'width': 320, 'height': 240},
|
||||
'37': {'width': 1920, 'height': 1080},
|
||||
'38': {'width': 4096, 'height': 3072},
|
||||
'43': {'width': 640, 'height': 360},
|
||||
'44': {'width': 854, 'height': 480},
|
||||
'45': {'width': 1280, 'height': 720},
|
||||
'46': {'width': 1920, 'height': 1080},
|
||||
'82': {'height': 360, 'display': '360p'},
|
||||
'83': {'height': 480, 'display': '480p'},
|
||||
'84': {'height': 720, 'display': '720p'},
|
||||
'85': {'height': 1080, 'display': '1080p'},
|
||||
'92': {'height': 240, 'display': '240p'},
|
||||
'93': {'height': 360, 'display': '360p'},
|
||||
'94': {'height': 480, 'display': '480p'},
|
||||
'95': {'height': 720, 'display': '720p'},
|
||||
'96': {'height': 1080, 'display': '1080p'},
|
||||
'100': {'height': 360, 'display': '360p'},
|
||||
'101': {'height': 480, 'display': '480p'},
|
||||
'102': {'height': 720, 'display': '720p'},
|
||||
'132': {'height': 240, 'display': '240p'},
|
||||
'151': {'height': 72, 'display': '72p'},
|
||||
'133': {'height': 240, 'display': '240p'},
|
||||
'134': {'height': 360, 'display': '360p'},
|
||||
'135': {'height': 480, 'display': '480p'},
|
||||
'136': {'height': 720, 'display': '720p'},
|
||||
'137': {'height': 1080, 'display': '1080p'},
|
||||
'138': {'height': 1081, 'display': '>1080p'},
|
||||
'139': {'display': '48k'},
|
||||
'140': {'display': '128k'},
|
||||
'141': {'display': '256k'},
|
||||
'160': {'height': 192, 'display': '192p'},
|
||||
'171': {'display': '128k'},
|
||||
'172': {'display': '256k'},
|
||||
'242': {'height': 240, 'display': '240p'},
|
||||
'243': {'height': 360, 'display': '360p'},
|
||||
'244': {'height': 480, 'display': '480p'},
|
||||
'245': {'height': 480, 'display': '480p'},
|
||||
'246': {'height': 480, 'display': '480p'},
|
||||
'247': {'height': 720, 'display': '720p'},
|
||||
'248': {'height': 1080, 'display': '1080p'},
|
||||
}
|
||||
_special_itags = {
|
||||
'82': '3D',
|
||||
@@ -1153,13 +1136,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
|
||||
def _print_formats(self, formats):
|
||||
print('Available formats:')
|
||||
for x in formats:
|
||||
print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
|
||||
self._video_dimensions.get(x, '???'),
|
||||
' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
|
||||
|
||||
def _extract_id(self, url):
|
||||
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
|
||||
if mobj is None:
|
||||
@@ -1172,48 +1148,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
Transform a dictionary in the format {itag:url} to a list of (itag, url)
|
||||
with the requested formats.
|
||||
"""
|
||||
req_format = self._downloader.params.get('format', None)
|
||||
format_limit = self._downloader.params.get('format_limit', None)
|
||||
available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
|
||||
if format_limit is not None and format_limit in available_formats:
|
||||
format_list = available_formats[available_formats.index(format_limit):]
|
||||
else:
|
||||
format_list = available_formats
|
||||
existing_formats = [x for x in format_list if x in url_map]
|
||||
existing_formats = [x for x in self._available_formats if x in url_map]
|
||||
if len(existing_formats) == 0:
|
||||
raise ExtractorError(u'no known formats available for video')
|
||||
if self._downloader.params.get('listformats', None):
|
||||
self._print_formats(existing_formats)
|
||||
return
|
||||
if req_format is None or req_format == 'best':
|
||||
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
|
||||
elif req_format == 'worst':
|
||||
video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
|
||||
elif req_format in ('-1', 'all'):
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
else:
|
||||
# Specific formats. We pick the first in a slash-delimeted sequence.
|
||||
# Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
|
||||
# available in the specified format. For example,
|
||||
# if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
|
||||
# if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
|
||||
# if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
|
||||
req_formats = req_format.split('/')
|
||||
video_url_list = None
|
||||
for rf in req_formats:
|
||||
if rf in url_map:
|
||||
video_url_list = [(rf, url_map[rf])]
|
||||
break
|
||||
if rf in self._video_formats_map:
|
||||
for srf in self._video_formats_map[rf]:
|
||||
if srf in url_map:
|
||||
video_url_list = [(srf, url_map[srf])]
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
if video_url_list is None:
|
||||
raise ExtractorError(u'requested format not available')
|
||||
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
|
||||
video_url_list.reverse() # order worst to best
|
||||
return video_url_list
|
||||
|
||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
||||
@@ -1462,50 +1401,67 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
|
||||
url += '&ratebypass=yes'
|
||||
url_map[url_data['itag'][0]] = url
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
elif video_info.get('hlsvp'):
|
||||
manifest_url = video_info['hlsvp'][0]
|
||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
||||
video_url_list = self._get_video_url_list(url_map)
|
||||
if not video_url_list:
|
||||
return
|
||||
|
||||
else:
|
||||
raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
results = []
|
||||
formats = []
|
||||
for itag, video_real_url in video_url_list:
|
||||
# Extension
|
||||
video_extension = self._video_extensions.get(itag, 'flv')
|
||||
resolution = self._video_dimensions.get(itag, {}).get('display')
|
||||
width = self._video_dimensions.get(itag, {}).get('width')
|
||||
height = self._video_dimensions.get(itag, {}).get('height')
|
||||
note = self._special_itags.get(itag)
|
||||
|
||||
video_format = '{0} - {1}{2}'.format(itag if itag else video_extension,
|
||||
self._video_dimensions.get(itag, '???'),
|
||||
'%dx%d' % (width, height) if width is not None and height is not None else (resolution if resolution is not None else '???'),
|
||||
' ('+self._special_itags[itag]+')' if itag in self._special_itags else '')
|
||||
|
||||
results.append({
|
||||
'id': video_id,
|
||||
'url': video_real_url,
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'ext': video_extension,
|
||||
'format': video_format,
|
||||
'format_id': itag,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'player_url': player_url,
|
||||
'subtitles': video_subtitles,
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
formats.append({
|
||||
'url': video_real_url,
|
||||
'ext': video_extension,
|
||||
'format': video_format,
|
||||
'format_id': itag,
|
||||
'player_url': player_url,
|
||||
'_resolution': resolution,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'format_note': note,
|
||||
})
|
||||
return results
|
||||
|
||||
def _formats_key(f):
|
||||
note = f.get('format_note')
|
||||
if note is None:
|
||||
note = u''
|
||||
is_dash = u'DASH' in note
|
||||
return (
|
||||
0 if is_dash else 1,
|
||||
f.get('height') if f.get('height') is not None else -1,
|
||||
f.get('width') if f.get('width') is not None else -1)
|
||||
formats.sort(key=_formats_key)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': video_uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'title': video_title,
|
||||
'thumbnail': video_thumbnail,
|
||||
'description': video_description,
|
||||
'subtitles': video_subtitles,
|
||||
'duration': video_duration,
|
||||
'age_limit': 18 if age_gate else 0,
|
||||
'annotations': video_annotations,
|
||||
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = u'YouTube.com playlists'
|
||||
@@ -1717,7 +1673,7 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# page by page until there are no video ids - it means we got
|
||||
# all of them.
|
||||
|
||||
video_ids = []
|
||||
url_results = []
|
||||
|
||||
for pagenum in itertools.count(0):
|
||||
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
|
||||
@@ -1735,10 +1691,17 @@ class YoutubeUserIE(InfoExtractor):
|
||||
break
|
||||
|
||||
# Extract video identifiers
|
||||
ids_in_page = []
|
||||
for entry in response['feed']['entry']:
|
||||
ids_in_page.append(entry['id']['$t'].split('/')[-1])
|
||||
video_ids.extend(ids_in_page)
|
||||
entries = response['feed']['entry']
|
||||
for entry in entries:
|
||||
title = entry['title']['$t']
|
||||
video_id = entry['id']['$t'].split('/')[-1]
|
||||
url_results.append({
|
||||
'_type': 'url',
|
||||
'url': video_id,
|
||||
'ie_key': 'Youtube',
|
||||
'id': 'video_id',
|
||||
'title': title,
|
||||
})
|
||||
|
||||
# A little optimization - if current page is not
|
||||
# "full", ie. does not contain PAGE_SIZE video ids then
|
||||
@@ -1746,12 +1709,9 @@ class YoutubeUserIE(InfoExtractor):
|
||||
# are no more ids on further pages - no need to query
|
||||
# again.
|
||||
|
||||
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
|
||||
if len(entries) < self._GDATA_PAGE_SIZE:
|
||||
break
|
||||
|
||||
url_results = [
|
||||
self.url_result(video_id, 'Youtube', video_id=video_id)
|
||||
for video_id in video_ids]
|
||||
return self.playlist_result(url_results, playlist_title=username)
|
||||
|
||||
|
||||
|
@@ -767,6 +767,10 @@ def unified_strdate(date_str):
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
except:
|
||||
pass
|
||||
if upload_date is None:
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
return upload_date
|
||||
|
||||
def determine_ext(url, default_ext=u'unknown_video'):
|
||||
@@ -1087,7 +1091,10 @@ def remove_start(s, start):
|
||||
|
||||
|
||||
def url_basename(url):
|
||||
m = re.match(r'(?:https?:|)//[^/]+/(?:[^/?#]+/)?([^/?#]+)/?(?:[?#]|$)', url)
|
||||
if not m:
|
||||
return u''
|
||||
return m.group(1)
|
||||
path = compat_urlparse.urlparse(url).path
|
||||
return path.strip(u'/').split(u'/')[-1]
|
||||
|
||||
|
||||
class HEADRequest(compat_urllib_request.Request):
|
||||
def get_method(self):
|
||||
return "HEAD"
|
||||
|
@@ -1,2 +1,2 @@
|
||||
|
||||
__version__ = '2013.12.17.1'
|
||||
__version__ = '2013.12.23.4'
|
||||
|
Reference in New Issue
Block a user