Option to determine extractors ahead of time

Sometimes I just want to know whether youtube-dl can be expected to handle a given URL.  This option accomplishes that quickly.  Previously, I would run youtube-dl with --simulate or --skip-download, but these would take 5+ seconds on my system before returning.  The --determine-extractors option of this PR, however, only takes 1.3 to 2.6 seconds.  You can use it, for example, to handle arbitrary URLs intelligently:  If --determine-extractor indicates success, run youtube-dl (or mpv or whatever).  Otherwise, run $BROWSER.

Thank you.
This commit is contained in:
Friendly 2021-10-10 22:12:04 -04:00
parent a803582717
commit df1852a74e
2 changed files with 22 additions and 0 deletions

View File

@ -9,6 +9,7 @@ import codecs
import io
import os
import random
import re
import sys
@ -123,6 +124,23 @@ def _real_main(argv=None):
table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
sys.exit(0)
if opts.determine_extractors:
status = 1
for url in all_urls:
if re.match(r'^[^\s/]+\.[^\s/]+/', url): # generic.py
write_string('The url doesn\'t specify the protocol, trying with http\n', out=sys.stderr)
url = 'http://' + url
for ie in list_extractors(opts.age_limit):
if not ie._WORKING or ie.IE_NAME == 'generic':
continue
try:
if re.match(getattr(ie, '_VALID_URL'), url):
write_string(ie.IE_NAME + ' ' + url + '\n', out=sys.stdout)
status = 0
break
except AttributeError:
pass
sys.exit(status)
# Conflicting, missing and erroneous options
if opts.usenetrc and (opts.username is not None or opts.password is not None):

View File

@ -161,6 +161,10 @@ def parseOpts(overrideArguments=None):
'--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions', default=False,
help='Output descriptions of all supported extractors')
general.add_option(
'--determine-extractors',
action='store_true', dest='determine_extractors', default=False,
help='List the extractor that would be used for each URL. Exit status indicates at least one successful match.')
general.add_option(
'--force-generic-extractor',
action='store_true', dest='force_generic_extractor', default=False,