youtube-dl/youtube_dl/__init__.py

439 lines
16 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding: utf-8 -*-
2012-01-05 08:36:47 +09:00
__authors__ = (
2012-11-28 10:04:46 +09:00
'Ricardo Garcia Gonzalez',
'Danny Colligan',
'Benjamin Johnson',
'Vasyl\' Vavrychuk',
'Witold Baryluk',
'Paweł Paprota',
'Gergely Imreh',
'Rogério Brito',
'Philipp Hagemeister',
'Sören Schulze',
'Kevin Ngo',
'Ori Avtalion',
'shizeeg',
'Filippo Valsorda',
'Christian Albrecht',
2012-12-16 19:16:57 +09:00
'Dave Vasilevsky',
2013-01-02 02:29:43 +09:00
'Jaime Marquínez Ferrándiz',
'Jeff Crouse',
2013-02-08 19:01:09 +09:00
'Osama Khalid',
2013-04-11 17:48:37 +09:00
'Michael Walter',
2013-05-04 03:08:16 +09:00
'M. Yasoob Ullah Khalid',
2013-05-06 04:35:39 +09:00
'Julien Fraichard',
'Johny Mo Swag',
2013-06-26 00:57:47 +09:00
'Axel Noack',
2013-06-28 03:50:34 +09:00
'Albert Kim',
'Pierre Rudloff',
2013-09-05 05:09:22 +09:00
'Huarong Huo',
2013-09-12 00:58:51 +09:00
'Ismael Mejía',
2013-09-19 05:30:22 +09:00
'Steffan \'Ruirize\' James',
2013-10-15 08:34:47 +09:00
'Andras Elso',
'Jelle van der Waa',
2013-11-13 19:08:07 +09:00
'Marcin Cieślak',
2013-11-20 14:36:00 +09:00
'Anton Larionov',
2013-11-24 14:39:49 +09:00
'Takuya Tsuchida',
2013-12-03 01:43:22 +09:00
'Sergey M.',
2013-12-16 13:11:19 +09:00
'Michael Orlitzky',
'Chris Gahan',
2014-01-17 11:09:34 +09:00
'Saimadhav Heblikar',
2014-01-27 15:43:41 +09:00
'Mike Col',
'Oleg Prutz',
'pulpe',
2014-02-07 20:00:58 +09:00
'Andreas Schmitz',
'Michael Kaiser',
2014-02-17 19:33:01 +09:00
'Niklas Laxström',
2014-02-23 04:19:41 +09:00
'David Triendl',
2014-02-26 08:30:47 +09:00
'Anthony Weems',
2014-02-28 23:14:25 +09:00
'David Wagner',
2014-03-03 20:54:01 +09:00
'Juan C. Olivares',
2014-03-10 18:30:17 +09:00
'Mattias Harrysson',
2014-03-29 18:11:32 +09:00
'phaer',
'Sainyam Kapoor',
2014-05-02 04:36:11 +09:00
'Nicolas Évrard',
2014-05-13 16:53:58 +09:00
'Jason Normore',
'Hoje Lee',
'Adam Thalhammer',
2014-06-07 22:40:27 +09:00
'Georg Jähnig',
2014-06-07 23:41:44 +09:00
'Ralf Haring',
2014-07-05 00:30:43 +09:00
'Koki Takahashi',
2014-07-11 18:11:52 +09:00
'Ariset Llerena',
2014-07-11 18:16:04 +09:00
'Adam Malcontenti-Wilson',
2014-07-11 18:29:17 +09:00
'Tobias Bell',
2014-07-12 21:23:54 +09:00
'Naglis Jonaitis',
2014-07-16 23:03:30 +09:00
'Charles Chen',
'Hassaan Ali',
2014-08-05 21:09:11 +09:00
'Dobrosław Żybort',
'David Fabijan',
'Sebastian Haas',
2014-08-21 18:57:44 +09:00
'Alexander Kirk',
'Erik Johnson',
'Keith Beckman',
'Ole Ernst',
2014-08-23 06:40:43 +09:00
'Aaron McDaniel (mcd1992)',
2014-09-05 22:00:12 +09:00
'Magnus Kolstad',
'Hari Padmanaban',
2014-09-17 06:33:13 +09:00
'Carlos Ramos',
2014-09-17 19:50:36 +09:00
'5moufl',
'lenaten',
2014-10-24 22:12:29 +09:00
'Dennis Scheiba',
2014-10-24 22:29:44 +09:00
'Damon Timm',
2013-06-28 03:50:34 +09:00
)
__license__ = 'Public Domain'
import codecs
2014-02-26 07:31:16 +09:00
import io
import os
import random
import sys
2014-09-13 14:58:44 +09:00
from .options import (
parseOpts,
)
2013-10-15 09:07:26 +09:00
from .utils import (
2014-03-18 22:27:42 +09:00
compat_getpass,
2013-10-15 09:07:26 +09:00
compat_print,
DateRange,
DEFAULT_OUTTMPL,
2013-10-15 09:07:26 +09:00
decodeOption,
DownloadError,
MaxDownloadsReached,
preferredencoding,
read_batch_urls,
2013-10-15 09:07:26 +09:00
SameFileError,
setproctitle,
2013-10-15 09:07:26 +09:00
std_headers,
write_string,
)
2012-12-31 03:49:14 +09:00
from .update import update_self
from .downloader import (
2013-10-15 09:07:26 +09:00
FileDownloader,
)
from .extractor import gen_extractors
from .YoutubeDL import YoutubeDL
from .postprocessor import (
2014-02-22 21:55:51 +09:00
AtomicParsleyPP,
FFmpegAudioFixPP,
2013-10-15 09:07:26 +09:00
FFmpegMetadataPP,
FFmpegVideoConvertor,
FFmpegExtractAudioPP,
FFmpegEmbedSubtitlePP,
XAttrMetadataPP,
ExecAfterDownloadPP,
2013-10-15 09:07:26 +09:00
)
def _real_main(argv=None):
# Compatibility fixes for Windows
if sys.platform == 'win32':
# https://github.com/rg3/youtube-dl/issues/820
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
setproctitle(u'youtube-dl')
parser, opts, args = parseOpts(argv)
2012-11-28 10:04:46 +09:00
# Set user agent
if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent
2013-04-24 20:56:04 +09:00
# Set referer
if opts.referer is not None:
std_headers['Referer'] = opts.referer
2012-11-28 10:04:46 +09:00
# Custom HTTP headers
if opts.headers is not None:
for h in opts.headers:
if h.find(':', 1) < 0:
parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
key, value = h.split(':', 2)
if opts.verbose:
write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
std_headers[key] = value
2012-11-28 10:04:46 +09:00
# Dump user agent
if opts.dump_user_agent:
2013-06-24 22:57:53 +09:00
compat_print(std_headers['User-Agent'])
2012-11-28 10:04:46 +09:00
sys.exit(0)
# Batch file verification
batch_urls = []
2012-11-28 10:04:46 +09:00
if opts.batchfile is not None:
try:
if opts.batchfile == '-':
batchfd = sys.stdin
else:
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
batch_urls = read_batch_urls(batchfd)
if opts.verbose:
write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
2012-11-28 10:04:46 +09:00
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batch_urls + args
2012-11-28 10:04:46 +09:00
all_urls = [url.strip() for url in all_urls]
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
2012-11-28 10:04:46 +09:00
extractors = gen_extractors()
if opts.list_extractors:
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
2013-06-24 22:57:53 +09:00
compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
matchedUrls = [url for url in all_urls if ie.suitable(url)]
2012-11-28 10:04:46 +09:00
for mu in matchedUrls:
2013-06-24 22:57:53 +09:00
compat_print(u' ' + mu)
2012-11-28 10:04:46 +09:00
sys.exit(0)
if opts.list_extractor_descriptions:
for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
if not ie._WORKING:
continue
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
if desc is False:
continue
if hasattr(ie, 'SEARCH_KEY'):
_SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
_COUNTS = (u'', u'5', u'10', u'all')
desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
compat_print(desc)
sys.exit(0)
2012-11-28 10:04:46 +09:00
# Conflicting, missing and erroneous options
if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
2014-01-12 08:27:00 +09:00
parser.error(u'account username missing\n')
2012-11-28 10:04:46 +09:00
if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
parser.error(u'using output template conflicts with using title, video ID or auto number')
if opts.usetitle and opts.useid:
parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
2014-03-18 22:27:42 +09:00
opts.password = compat_getpass(u'Type account password and press [Return]: ')
2012-11-28 10:04:46 +09:00
if opts.ratelimit is not None:
numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
if numeric_limit is None:
parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit
if opts.min_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
if numeric_limit is None:
parser.error(u'invalid min_filesize specified')
opts.min_filesize = numeric_limit
if opts.max_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
if numeric_limit is None:
parser.error(u'invalid max_filesize specified')
opts.max_filesize = numeric_limit
2012-11-28 10:04:46 +09:00
if opts.retries is not None:
try:
opts.retries = int(opts.retries)
except (TypeError, ValueError):
2012-11-28 10:04:46 +09:00
parser.error(u'invalid retry count specified')
if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None:
parser.error(u'invalid buffer size specified')
opts.buffersize = numeric_buffersize
if opts.playliststart <= 0:
raise ValueError(u'Playlist start must be positive')
if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
raise ValueError(u'Playlist end must be greater than playlist start')
2012-11-28 10:04:46 +09:00
if opts.extractaudio:
2013-01-11 03:15:04 +09:00
if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
2012-11-28 10:04:46 +09:00
parser.error(u'invalid audio format specified')
if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K')
if not opts.audioquality.isdigit():
parser.error(u'invalid audio quality specified')
2013-01-12 23:07:59 +09:00
if opts.recodevideo is not None:
2014-05-10 22:09:56 +09:00
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
2013-01-12 23:07:59 +09:00
parser.error(u'invalid video recode format specified')
if opts.date is not None:
date = DateRange.day(opts.date)
else:
date = DateRange(opts.dateafter, opts.datebefore)
2012-11-28 10:04:46 +09:00
# Do not download videos when there are audio-only formats
if opts.extractaudio and not opts.keepvideo and opts.format is None:
opts.format = 'bestaudio/best'
# --all-sub automatically sets --write-sub if --write-auto-sub is not given
# this was the old behaviour if only --all-sub was given.
if opts.allsubtitles and (opts.writeautomaticsub == False):
opts.writesubtitles = True
if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
2012-12-16 07:54:44 +09:00
if opts.outtmpl is not None:
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
outtmpl =((opts.outtmpl is not None and opts.outtmpl)
or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
u' template'.format(outtmpl))
2013-09-25 04:55:25 +09:00
2013-12-16 12:15:10 +09:00
any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
2013-12-16 12:15:10 +09:00
ydl_opts = {
2012-11-28 10:04:46 +09:00
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
'twofactor': opts.twofactor,
'videopassword': opts.videopassword,
2013-12-16 12:15:10 +09:00
'quiet': (opts.quiet or any_printing),
2014-03-26 08:43:46 +09:00
'no_warnings': opts.no_warnings,
2012-11-28 10:04:46 +09:00
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forceid': opts.getid,
2012-11-28 10:04:46 +09:00
'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription,
2013-12-16 12:15:10 +09:00
'forceduration': opts.getduration,
2012-11-28 10:04:46 +09:00
'forcefilename': opts.getfilename,
'forceformat': opts.getformat,
2013-11-19 22:59:22 +09:00
'forcejson': opts.dumpjson,
2012-11-28 10:04:46 +09:00
'simulate': opts.simulate,
2013-12-16 12:15:10 +09:00
'skip_download': (opts.skip_download or opts.simulate or any_printing),
2012-11-28 10:04:46 +09:00
'format': opts.format,
'format_limit': opts.format_limit,
'listformats': opts.listformats,
'outtmpl': outtmpl,
'autonumber_size': opts.autonumber_size,
2012-11-28 10:04:46 +09:00
'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
'retries': opts.retries,
'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl,
'noprogress': opts.noprogress,
'progress_with_newline': opts.progress_with_newline,
2012-11-28 10:04:46 +09:00
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
'noplaylist': opts.noplaylist,
2012-11-28 10:04:46 +09:00
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
'updatetime': opts.updatetime,
'writedescription': opts.writedescription,
'writeannotations': opts.writeannotations,
2012-11-28 10:04:46 +09:00
'writeinfojson': opts.writeinfojson,
'writethumbnail': opts.writethumbnail,
2012-11-28 10:04:46 +09:00
'writesubtitles': opts.writesubtitles,
'writeautomaticsub': opts.writeautomaticsub,
'allsubtitles': opts.allsubtitles,
'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat,
'subtitleslangs': opts.subtitleslangs,
'matchtitle': decodeOption(opts.matchtitle),
'rejecttitle': decodeOption(opts.rejecttitle),
2012-11-28 10:04:46 +09:00
'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats,
'verbose': opts.verbose,
2013-04-12 01:31:35 +09:00
'dump_intermediate_pages': opts.dump_intermediate_pages,
2013-10-28 18:44:02 +09:00
'write_pages': opts.write_pages,
'test': opts.test,
2013-01-12 23:07:59 +09:00
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'min_views': opts.min_views,
'max_views': opts.max_views,
'daterange': date,
2013-09-25 04:26:10 +09:00
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
'download_archive': download_archive_fn,
'cookiefile': opts.cookiefile,
'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure,
'proxy': opts.proxy,
2013-12-02 21:37:05 +09:00
'socket_timeout': opts.socket_timeout,
'bidi_workaround': opts.bidi_workaround,
2013-12-29 23:28:32 +09:00
'debug_printtraffic': opts.debug_printtraffic,
'prefer_ffmpeg': opts.prefer_ffmpeg,
'include_ads': opts.include_ads,
'default_search': opts.default_search,
'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
'encoding': opts.encoding,
2014-08-25 17:18:01 +09:00
'exec_cmd': opts.exec_cmd,
'extract_flat': opts.extract_flat,
}
2012-11-28 10:04:46 +09:00
with YoutubeDL(ydl_opts) as ydl:
ydl.print_debug_header()
ydl.add_default_info_extractors()
# PostProcessors
# Add the metadata pp first, the other pps will copy it
if opts.addmetadata:
ydl.add_post_processor(FFmpegMetadataPP())
if opts.extractaudio:
ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
if opts.recodevideo:
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP())
2014-02-22 21:55:51 +09:00
if opts.embedthumbnail:
2014-02-23 02:31:54 +09:00
if not opts.addmetadata:
ydl.add_post_processor(FFmpegAudioFixPP())
2014-02-22 21:55:51 +09:00
ydl.add_post_processor(AtomicParsleyPP())
2014-08-23 06:40:43 +09:00
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
2014-08-25 17:18:01 +09:00
if opts.exec_cmd:
ydl.add_post_processor(ExecAfterDownloadPP(
verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
2014-08-23 06:40:43 +09:00
# Update version
if opts.update_self:
update_self(ydl.to_screen, opts.verbose)
2014-07-24 19:16:16 +09:00
# Remove cache dir
if opts.rm_cachedir:
ydl.cache.remove()
2014-07-24 19:16:16 +09:00
# Maybe do nothing
if (len(all_urls) < 1) and (opts.load_info_filename is None):
2014-07-24 19:16:16 +09:00
if not (opts.update_self or opts.rm_cachedir):
parser.error(u'you must provide at least one URL')
else:
sys.exit()
2012-11-28 10:04:46 +09:00
try:
if opts.load_info_filename is not None:
retcode = ydl.download_with_info_file(opts.load_info_filename)
else:
retcode = ydl.download(all_urls)
except MaxDownloadsReached:
ydl.to_screen(u'--max-download limit reached, aborting.')
retcode = 101
2012-11-28 10:04:46 +09:00
sys.exit(retcode)
def main(argv=None):
2012-11-28 10:04:46 +09:00
try:
_real_main(argv)
2012-11-28 10:04:46 +09:00
except DownloadError:
sys.exit(1)
except SameFileError:
sys.exit(u'ERROR: fixed output name but more than one file to download')
except KeyboardInterrupt:
sys.exit(u'\nERROR: Interrupted by user')