mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-17 13:48:37 +09:00
Merge branch 'master' into extractor/arte/improvement-switch-to-config-api-v2
This commit is contained in:
@@ -73,6 +73,7 @@ from .utils import (
|
||||
PostProcessingError,
|
||||
preferredencoding,
|
||||
prepend_extension,
|
||||
process_communicate_or_kill,
|
||||
register_socks_protocols,
|
||||
render_table,
|
||||
replace_extension,
|
||||
@@ -720,7 +721,7 @@ class YoutubeDL(object):
|
||||
filename = encodeFilename(filename, True).decode(preferredencoding())
|
||||
return sanitize_path(filename)
|
||||
except ValueError as err:
|
||||
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
|
||||
return None
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
@@ -1529,7 +1530,7 @@ class YoutubeDL(object):
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
try:
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
|
||||
except (ValueError, OverflowError, OSError):
|
||||
pass
|
||||
|
||||
@@ -1569,9 +1570,6 @@ class YoutubeDL(object):
|
||||
else:
|
||||
formats = info_dict['formats']
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
def is_wellformed(f):
|
||||
url = f.get('url')
|
||||
if not url:
|
||||
@@ -1584,7 +1582,10 @@ class YoutubeDL(object):
|
||||
return True
|
||||
|
||||
# Filter out malformed formats for better extraction robustness
|
||||
formats = list(filter(is_wellformed, formats))
|
||||
formats = list(filter(is_wellformed, formats or []))
|
||||
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found!')
|
||||
|
||||
formats_dict = {}
|
||||
|
||||
@@ -1778,10 +1779,9 @@ class YoutubeDL(object):
|
||||
|
||||
assert info_dict.get('_type', 'video') == 'video'
|
||||
|
||||
max_downloads = self.params.get('max_downloads')
|
||||
if max_downloads is not None:
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
|
||||
if self._num_downloads >= max_downloads:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
@@ -1906,8 +1906,17 @@ class YoutubeDL(object):
|
||||
|
||||
if not self.params.get('skip_download', False):
|
||||
try:
|
||||
def checked_get_suitable_downloader(info_dict, params):
|
||||
ed_args = params.get('external_downloader_args')
|
||||
dler = get_suitable_downloader(info_dict, params)
|
||||
if ed_args and not params.get('external_downloader_args'):
|
||||
# external_downloader_args was cleared because external_downloader was rejected
|
||||
self.report_warning('Requested external downloader cannot be used: '
|
||||
'ignoring --external-downloader-args.')
|
||||
return dler
|
||||
|
||||
def dl(name, info):
|
||||
fd = get_suitable_downloader(info, self.params)(self, self.params)
|
||||
fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
@@ -2049,9 +2058,12 @@ class YoutubeDL(object):
|
||||
try:
|
||||
self.post_process(filename, info_dict)
|
||||
except (PostProcessingError) as err:
|
||||
self.report_error('postprocessing: %s' % str(err))
|
||||
self.report_error('postprocessing: %s' % error_to_compat_str(err))
|
||||
return
|
||||
self.record_download_archive(info_dict)
|
||||
# avoid possible nugatory search for further items (PR #26638)
|
||||
if self._num_downloads >= max_downloads:
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
def download(self, url_list):
|
||||
"""Download a given list of URLs."""
|
||||
@@ -2314,7 +2326,7 @@ class YoutubeDL(object):
|
||||
['git', 'rev-parse', '--short', 'HEAD'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.path.dirname(os.path.abspath(__file__)))
|
||||
out, err = sp.communicate()
|
||||
out, err = process_communicate_or_kill(sp)
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
||||
|
@@ -8,6 +8,18 @@ from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
|
||||
|
||||
def pkcs7_padding(data):
|
||||
"""
|
||||
PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@returns {int[]} padding data
|
||||
"""
|
||||
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
|
||||
return data + [remaining_length] * remaining_length
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, counter):
|
||||
"""
|
||||
Decrypt with aes in counter mode
|
||||
@@ -76,8 +88,7 @@ def aes_cbc_encrypt(data, key, iv):
|
||||
previous_cipher_block = iv
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
remaining_length = BLOCK_SIZE_BYTES - len(block)
|
||||
block += [remaining_length] * remaining_length
|
||||
block = pkcs7_padding(block)
|
||||
mixed_block = xor(block, previous_cipher_block)
|
||||
|
||||
encrypted_block = aes_encrypt(mixed_block, expanded_key)
|
||||
@@ -88,6 +99,28 @@ def aes_cbc_encrypt(data, key, iv):
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def aes_ecb_encrypt(data, key):
|
||||
"""
|
||||
Encrypt with aes in ECB mode. Using PKCS#7 padding
|
||||
|
||||
@param {int[]} data cleartext
|
||||
@param {int[]} key 16/24/32-Byte cipher key
|
||||
@returns {int[]} encrypted data
|
||||
"""
|
||||
expanded_key = key_expansion(key)
|
||||
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||
|
||||
encrypted_data = []
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
block = pkcs7_padding(block)
|
||||
|
||||
encrypted_block = aes_encrypt(block, expanded_key)
|
||||
encrypted_data += encrypted_block
|
||||
|
||||
return encrypted_data
|
||||
|
||||
|
||||
def key_expansion(data):
|
||||
"""
|
||||
Generate key schedule
|
||||
@@ -303,7 +336,7 @@ def xor(data1, data2):
|
||||
|
||||
|
||||
def rijndael_mul(a, b):
|
||||
if(a == 0 or b == 0):
|
||||
if (a == 0 or b == 0):
|
||||
return 0
|
||||
return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF]
|
||||
|
||||
|
@@ -10,12 +10,21 @@ import traceback
|
||||
|
||||
from .compat import compat_getenv
|
||||
from .utils import (
|
||||
error_to_compat_str,
|
||||
expand_path,
|
||||
is_outdated_version,
|
||||
try_get,
|
||||
write_json_file,
|
||||
)
|
||||
from .version import __version__
|
||||
|
||||
|
||||
class Cache(object):
|
||||
|
||||
_YTDL_DIR = 'youtube-dl'
|
||||
_VERSION_KEY = _YTDL_DIR + '_version'
|
||||
_DEFAULT_VERSION = '2021.12.17'
|
||||
|
||||
def __init__(self, ydl):
|
||||
self._ydl = ydl
|
||||
|
||||
@@ -23,7 +32,7 @@ class Cache(object):
|
||||
res = self._ydl.params.get('cachedir')
|
||||
if res is None:
|
||||
cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
|
||||
res = os.path.join(cache_root, 'youtube-dl')
|
||||
res = os.path.join(cache_root, self._YTDL_DIR)
|
||||
return expand_path(res)
|
||||
|
||||
def _get_cache_fn(self, section, key, dtype):
|
||||
@@ -50,13 +59,22 @@ class Cache(object):
|
||||
except OSError as ose:
|
||||
if ose.errno != errno.EEXIST:
|
||||
raise
|
||||
write_json_file(data, fn)
|
||||
write_json_file({self._VERSION_KEY: __version__, 'data': data}, fn)
|
||||
except Exception:
|
||||
tb = traceback.format_exc()
|
||||
self._ydl.report_warning(
|
||||
'Writing cache to %r failed: %s' % (fn, tb))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None):
|
||||
def _validate(self, data, min_ver):
|
||||
version = try_get(data, lambda x: x[self._VERSION_KEY])
|
||||
if not version: # Backward compatibility
|
||||
data, version = {'data': data}, self._DEFAULT_VERSION
|
||||
if not is_outdated_version(version, min_ver or '0', assume_new=False):
|
||||
return data['data']
|
||||
self._ydl.to_screen(
|
||||
'Discarding old cache from version {version} (needs {min_ver})'.format(**locals()))
|
||||
|
||||
def load(self, section, key, dtype='json', default=None, min_ver=None):
|
||||
assert dtype in ('json',)
|
||||
|
||||
if not self.enabled:
|
||||
@@ -66,12 +84,12 @@ class Cache(object):
|
||||
try:
|
||||
try:
|
||||
with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
|
||||
return json.load(cachef)
|
||||
return self._validate(json.load(cachef), min_ver)
|
||||
except ValueError:
|
||||
try:
|
||||
file_size = os.path.getsize(cache_fn)
|
||||
except (OSError, IOError) as oe:
|
||||
file_size = str(oe)
|
||||
file_size = error_to_compat_str(oe)
|
||||
self._ydl.report_warning(
|
||||
'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
|
||||
except IOError:
|
||||
|
1667
youtube_dl/casefold.py
Normal file
1667
youtube_dl/casefold.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -21,6 +21,28 @@ import subprocess
|
||||
import sys
|
||||
import xml.etree.ElementTree
|
||||
|
||||
# naming convention
|
||||
# 'compat_' + Python3_name.replace('.', '_')
|
||||
# other aliases exist for convenience and/or legacy
|
||||
|
||||
# deal with critical unicode/str things first
|
||||
try:
|
||||
# Python 2
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
unicode, basestring, unichr
|
||||
)
|
||||
from .casefold import casefold as compat_casefold
|
||||
|
||||
except NameError:
|
||||
compat_str, compat_basestring, compat_chr = (
|
||||
str, str, chr
|
||||
)
|
||||
compat_casefold = lambda s: s.casefold()
|
||||
|
||||
try:
|
||||
import collections.abc as compat_collections_abc
|
||||
except ImportError:
|
||||
import collections as compat_collections_abc
|
||||
|
||||
try:
|
||||
import urllib.request as compat_urllib_request
|
||||
@@ -36,16 +58,15 @@ try:
|
||||
import urllib.parse as compat_urllib_parse
|
||||
except ImportError: # Python 2
|
||||
import urllib as compat_urllib_parse
|
||||
import urlparse as _urlparse
|
||||
for a in dir(_urlparse):
|
||||
if not hasattr(compat_urllib_parse, a):
|
||||
setattr(compat_urllib_parse, a, getattr(_urlparse, a))
|
||||
del _urlparse
|
||||
|
||||
try:
|
||||
from urllib.parse import urlparse as compat_urllib_parse_urlparse
|
||||
except ImportError: # Python 2
|
||||
from urlparse import urlparse as compat_urllib_parse_urlparse
|
||||
|
||||
try:
|
||||
import urllib.parse as compat_urlparse
|
||||
except ImportError: # Python 2
|
||||
import urlparse as compat_urlparse
|
||||
# unfavoured aliases
|
||||
compat_urlparse = compat_urllib_parse
|
||||
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
|
||||
|
||||
try:
|
||||
import urllib.response as compat_urllib_response
|
||||
@@ -56,6 +77,7 @@ try:
|
||||
import http.cookiejar as compat_cookiejar
|
||||
except ImportError: # Python 2
|
||||
import cookielib as compat_cookiejar
|
||||
compat_http_cookiejar = compat_cookiejar
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
||||
@@ -67,11 +89,13 @@ if sys.version_info[0] == 2:
|
||||
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
||||
else:
|
||||
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
|
||||
|
||||
try:
|
||||
import http.cookies as compat_cookies
|
||||
except ImportError: # Python 2
|
||||
import Cookie as compat_cookies
|
||||
compat_http_cookies = compat_cookies
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||
@@ -81,6 +105,7 @@ if sys.version_info[0] == 2:
|
||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||
else:
|
||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
||||
|
||||
try:
|
||||
import html.entities as compat_html_entities
|
||||
@@ -2334,16 +2359,19 @@ try:
|
||||
from urllib.error import HTTPError as compat_HTTPError
|
||||
except ImportError: # Python 2
|
||||
from urllib2 import HTTPError as compat_HTTPError
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
|
||||
try:
|
||||
from urllib.request import urlretrieve as compat_urlretrieve
|
||||
except ImportError: # Python 2
|
||||
from urllib import urlretrieve as compat_urlretrieve
|
||||
compat_urllib_request_urlretrieve = compat_urlretrieve
|
||||
|
||||
try:
|
||||
from html.parser import HTMLParser as compat_HTMLParser
|
||||
except ImportError: # Python 2
|
||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||
compat_html_parser_HTMLParser = compat_HTMLParser
|
||||
|
||||
try: # Python 2
|
||||
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||
@@ -2357,6 +2385,7 @@ except ImportError: # Python <3.4
|
||||
# and uniform cross-version exception handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||
|
||||
try:
|
||||
from subprocess import DEVNULL
|
||||
@@ -2369,15 +2398,12 @@ try:
|
||||
except ImportError:
|
||||
import BaseHTTPServer as compat_http_server
|
||||
|
||||
try:
|
||||
compat_str = unicode # Python 2
|
||||
except NameError:
|
||||
compat_str = str
|
||||
|
||||
try:
|
||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||
else re.compile(r'([\x00-\x7f]+)'))
|
||||
@@ -2444,9 +2470,6 @@ except ImportError: # Python 2
|
||||
string = string.replace('+', ' ')
|
||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||
|
||||
try:
|
||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||
except ImportError: # Python 2
|
||||
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
|
||||
# Possible solutions are to either port it from python 3 with all
|
||||
# the friends or manually ensure input query contains only byte strings.
|
||||
@@ -2468,7 +2491,62 @@ except ImportError: # Python 2
|
||||
def encode_list(l):
|
||||
return [encode_elem(e) for e in l]
|
||||
|
||||
return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
|
||||
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
|
||||
|
||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||
# Python 2's version is apparently totally broken
|
||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
qs, _coerce_result = qs, compat_str
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
for name_value in pairs:
|
||||
if not name_value and not strict_parsing:
|
||||
continue
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
if strict_parsing:
|
||||
raise ValueError('bad query field: %r' % (name_value,))
|
||||
# Handle case of a control-name with no equal sign
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
else:
|
||||
continue
|
||||
if len(nv[1]) or keep_blank_values:
|
||||
name = nv[0].replace('+', ' ')
|
||||
name = compat_urllib_parse_unquote(
|
||||
name, encoding=encoding, errors=errors)
|
||||
name = _coerce_result(name)
|
||||
value = nv[1].replace('+', ' ')
|
||||
value = compat_urllib_parse_unquote(
|
||||
value, encoding=encoding, errors=errors)
|
||||
value = _coerce_result(value)
|
||||
r.append((name, value))
|
||||
return r
|
||||
|
||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
parsed_result = {}
|
||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||
encoding=encoding, errors=errors)
|
||||
for name, value in pairs:
|
||||
if name in parsed_result:
|
||||
parsed_result[name].append(value)
|
||||
else:
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
setattr(compat_urllib_parse, '_urlencode',
|
||||
getattr(compat_urllib_parse, 'urlencode'))
|
||||
for name, fix in (
|
||||
('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
|
||||
('parse_unquote', compat_urllib_parse_unquote),
|
||||
('unquote_plus', compat_urllib_parse_unquote_plus),
|
||||
('urlencode', compat_urllib_parse_urlencode),
|
||||
('parse_qs', compat_parse_qs)):
|
||||
setattr(compat_urllib_parse, name, fix)
|
||||
|
||||
compat_urllib_parse_parse_qs = compat_parse_qs
|
||||
|
||||
try:
|
||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||
@@ -2504,21 +2582,11 @@ except ImportError: # Python < 3.4
|
||||
|
||||
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
|
||||
|
||||
try:
|
||||
compat_basestring = basestring # Python 2
|
||||
except NameError:
|
||||
compat_basestring = str
|
||||
|
||||
try:
|
||||
compat_chr = unichr # Python 2
|
||||
except NameError:
|
||||
compat_chr = chr
|
||||
|
||||
try:
|
||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
|
||||
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
@@ -2532,10 +2600,11 @@ try:
|
||||
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||
# the following will crash with:
|
||||
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||
isinstance(None, xml.etree.ElementTree.Element)
|
||||
isinstance(None, etree.Element)
|
||||
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||
except TypeError: # Python <=2.6
|
||||
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||
compat_xml_etree_ElementTree_Element = compat_etree_Element
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
def compat_etree_fromstring(text):
|
||||
@@ -2591,6 +2660,7 @@ else:
|
||||
if k == uri or v == prefix:
|
||||
del etree._namespace_map[k]
|
||||
etree._namespace_map[uri] = prefix
|
||||
compat_xml_etree_register_namespace = compat_etree_register_namespace
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||
@@ -2602,53 +2672,6 @@ if sys.version_info < (2, 7):
|
||||
else:
|
||||
compat_xpath = lambda xpath: xpath
|
||||
|
||||
try:
|
||||
from urllib.parse import parse_qs as compat_parse_qs
|
||||
except ImportError: # Python 2
|
||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||
# Python 2's version is apparently totally broken
|
||||
|
||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
qs, _coerce_result = qs, compat_str
|
||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||
r = []
|
||||
for name_value in pairs:
|
||||
if not name_value and not strict_parsing:
|
||||
continue
|
||||
nv = name_value.split('=', 1)
|
||||
if len(nv) != 2:
|
||||
if strict_parsing:
|
||||
raise ValueError('bad query field: %r' % (name_value,))
|
||||
# Handle case of a control-name with no equal sign
|
||||
if keep_blank_values:
|
||||
nv.append('')
|
||||
else:
|
||||
continue
|
||||
if len(nv[1]) or keep_blank_values:
|
||||
name = nv[0].replace('+', ' ')
|
||||
name = compat_urllib_parse_unquote(
|
||||
name, encoding=encoding, errors=errors)
|
||||
name = _coerce_result(name)
|
||||
value = nv[1].replace('+', ' ')
|
||||
value = compat_urllib_parse_unquote(
|
||||
value, encoding=encoding, errors=errors)
|
||||
value = _coerce_result(value)
|
||||
r.append((name, value))
|
||||
return r
|
||||
|
||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||
encoding='utf-8', errors='replace'):
|
||||
parsed_result = {}
|
||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||
encoding=encoding, errors=errors)
|
||||
for name, value in pairs:
|
||||
if name in parsed_result:
|
||||
parsed_result[name].append(value)
|
||||
else:
|
||||
parsed_result[name] = [value]
|
||||
return parsed_result
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
@@ -2773,6 +2796,8 @@ else:
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
|
||||
|
||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
# os.path.realpath on Windows does not follow symbolic links
|
||||
@@ -2784,6 +2809,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
else:
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
compat_os_path_realpath = compat_realpath
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
def compat_print(s):
|
||||
@@ -2804,11 +2831,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
|
||||
else:
|
||||
compat_getpass = getpass.getpass
|
||||
|
||||
compat_getpass_getpass = compat_getpass
|
||||
|
||||
|
||||
try:
|
||||
compat_input = raw_input
|
||||
except NameError: # Python 3
|
||||
compat_input = input
|
||||
|
||||
|
||||
# Python < 2.6.5 require kwargs to be bytes
|
||||
try:
|
||||
def _testfunc(x):
|
||||
@@ -2886,6 +2917,7 @@ else:
|
||||
_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
|
||||
|
||||
def compat_get_terminal_size(fallback=(80, 24)):
|
||||
from .utils import process_communicate_or_kill
|
||||
columns = compat_getenv('COLUMNS')
|
||||
if columns:
|
||||
columns = int(columns)
|
||||
@@ -2902,7 +2934,7 @@ else:
|
||||
sp = subprocess.Popen(
|
||||
['stty', 'size'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = sp.communicate()
|
||||
out, err = process_communicate_or_kill(sp)
|
||||
_lines, _columns = map(int, out.split())
|
||||
except Exception:
|
||||
_columns, _lines = _terminal_size(*fallback)
|
||||
@@ -2913,15 +2945,16 @@ else:
|
||||
lines = _lines
|
||||
return _terminal_size(columns, lines)
|
||||
|
||||
|
||||
try:
|
||||
itertools.count(start=0, step=1)
|
||||
compat_itertools_count = itertools.count
|
||||
except TypeError: # Python 2.6
|
||||
def compat_itertools_count(start=0, step=1):
|
||||
n = start
|
||||
while True:
|
||||
yield n
|
||||
n += step
|
||||
yield start
|
||||
start += step
|
||||
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
from tokenize import tokenize as compat_tokenize_tokenize
|
||||
@@ -2962,6 +2995,24 @@ else:
|
||||
compat_Struct = struct.Struct
|
||||
|
||||
|
||||
# compat_map/filter() returning an iterator, supposedly the
|
||||
# same versioning as for zip below
|
||||
try:
|
||||
from future_builtins import map as compat_map
|
||||
except ImportError:
|
||||
try:
|
||||
from itertools import imap as compat_map
|
||||
except ImportError:
|
||||
compat_map = map
|
||||
|
||||
try:
|
||||
from future_builtins import filter as compat_filter
|
||||
except ImportError:
|
||||
try:
|
||||
from itertools import ifilter as compat_filter
|
||||
except ImportError:
|
||||
compat_filter = filter
|
||||
|
||||
try:
|
||||
from future_builtins import zip as compat_zip
|
||||
except ImportError: # not 2.6+ or is 3.x
|
||||
@@ -2971,6 +3022,82 @@ except ImportError: # not 2.6+ or is 3.x
|
||||
compat_zip = zip
|
||||
|
||||
|
||||
# method renamed between Py2/3
|
||||
try:
|
||||
from itertools import zip_longest as compat_itertools_zip_longest
|
||||
except ImportError:
|
||||
from itertools import izip_longest as compat_itertools_zip_longest
|
||||
|
||||
|
||||
# new class in collections
|
||||
try:
|
||||
from collections import ChainMap as compat_collections_chain_map
|
||||
# Py3.3's ChainMap is deficient
|
||||
if sys.version_info < (3, 4):
|
||||
raise ImportError
|
||||
except ImportError:
|
||||
# Py <= 3.3
|
||||
class compat_collections_chain_map(compat_collections_abc.MutableMapping):
|
||||
|
||||
maps = [{}]
|
||||
|
||||
def __init__(self, *maps):
|
||||
self.maps = list(maps) or [{}]
|
||||
|
||||
def __getitem__(self, k):
|
||||
for m in self.maps:
|
||||
if k in m:
|
||||
return m[k]
|
||||
raise KeyError(k)
|
||||
|
||||
def __setitem__(self, k, v):
|
||||
self.maps[0].__setitem__(k, v)
|
||||
return
|
||||
|
||||
def __contains__(self, k):
|
||||
return any((k in m) for m in self.maps)
|
||||
|
||||
def __delitem(self, k):
|
||||
if k in self.maps[0]:
|
||||
del self.maps[0][k]
|
||||
return
|
||||
raise KeyError(k)
|
||||
|
||||
def __delitem__(self, k):
|
||||
self.__delitem(k)
|
||||
|
||||
def __iter__(self):
|
||||
return itertools.chain(*reversed(self.maps))
|
||||
|
||||
def __len__(self):
|
||||
return len(iter(self))
|
||||
|
||||
# to match Py3, don't del directly
|
||||
def pop(self, k, *args):
|
||||
if self.__contains__(k):
|
||||
off = self.__getitem__(k)
|
||||
self.__delitem(k)
|
||||
return off
|
||||
elif len(args) > 0:
|
||||
return args[0]
|
||||
raise KeyError(k)
|
||||
|
||||
def new_child(self, m=None, **kwargs):
|
||||
m = m or {}
|
||||
m.update(kwargs)
|
||||
return compat_collections_chain_map(m, *self.maps)
|
||||
|
||||
@property
|
||||
def parents(self):
|
||||
return compat_collections_chain_map(*(self.maps[1:]))
|
||||
|
||||
|
||||
# Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?)
|
||||
compat_re_Pattern = type(re.compile(''))
|
||||
# and on the type of a match
|
||||
compat_re_Match = type(re.match('a', 'a'))
|
||||
|
||||
|
||||
if sys.version_info < (3, 3):
|
||||
def compat_b64decode(s, *args, **kwargs):
|
||||
if isinstance(s, compat_str):
|
||||
@@ -2979,6 +3106,8 @@ if sys.version_info < (3, 3):
|
||||
else:
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
compat_base64_b64decode = compat_b64decode
|
||||
|
||||
|
||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||
@@ -2998,26 +3127,53 @@ else:
|
||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||
|
||||
|
||||
__all__ = [
|
||||
legacy = [
|
||||
'compat_HTMLParseError',
|
||||
'compat_HTMLParser',
|
||||
'compat_HTTPError',
|
||||
'compat_Struct',
|
||||
'compat_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
'compat_cookies_SimpleCookie',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
'compat_getpass',
|
||||
'compat_parse_qs',
|
||||
'compat_realpath',
|
||||
'compat_urllib_parse_parse_qs',
|
||||
'compat_urllib_parse_unquote',
|
||||
'compat_urllib_parse_unquote_plus',
|
||||
'compat_urllib_parse_unquote_to_bytes',
|
||||
'compat_urllib_parse_urlencode',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
]
|
||||
|
||||
|
||||
__all__ = [
|
||||
'compat_html_parser_HTMLParseError',
|
||||
'compat_html_parser_HTMLParser',
|
||||
'compat_Struct',
|
||||
'compat_base64_b64decode',
|
||||
'compat_basestring',
|
||||
'compat_casefold',
|
||||
'compat_chr',
|
||||
'compat_collections_abc',
|
||||
'compat_collections_chain_map',
|
||||
'compat_http_cookiejar',
|
||||
'compat_http_cookiejar_Cookie',
|
||||
'compat_http_cookies',
|
||||
'compat_http_cookies_SimpleCookie',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_fromstring',
|
||||
'compat_filter',
|
||||
'compat_get_terminal_size',
|
||||
'compat_getenv',
|
||||
'compat_getpass',
|
||||
'compat_getpass_getpass',
|
||||
'compat_html_entities',
|
||||
'compat_html_entities_html5',
|
||||
'compat_http_client',
|
||||
@@ -3025,13 +3181,17 @@ __all__ = [
|
||||
'compat_input',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_itertools_zip_longest',
|
||||
'compat_kwargs',
|
||||
'compat_map',
|
||||
'compat_numeric_types',
|
||||
'compat_ord',
|
||||
'compat_os_name',
|
||||
'compat_parse_qs',
|
||||
'compat_os_path_expanduser',
|
||||
'compat_os_path_realpath',
|
||||
'compat_print',
|
||||
'compat_realpath',
|
||||
'compat_re_Match',
|
||||
'compat_re_Pattern',
|
||||
'compat_setenv',
|
||||
'compat_shlex_quote',
|
||||
'compat_shlex_split',
|
||||
@@ -3043,17 +3203,14 @@ __all__ = [
|
||||
'compat_tokenize_tokenize',
|
||||
'compat_urllib_error',
|
||||
'compat_urllib_parse',
|
||||
'compat_urllib_parse_unquote',
|
||||
'compat_urllib_parse_unquote_plus',
|
||||
'compat_urllib_parse_unquote_to_bytes',
|
||||
'compat_urllib_parse_urlencode',
|
||||
'compat_urllib_parse_urlparse',
|
||||
'compat_urllib_request',
|
||||
'compat_urllib_request_DataHandler',
|
||||
'compat_urllib_response',
|
||||
'compat_urlparse',
|
||||
'compat_urlretrieve',
|
||||
'compat_xml_parse_error',
|
||||
'compat_urllib_request_urlretrieve',
|
||||
'compat_urllib_HTTPError',
|
||||
'compat_xml_etree_ElementTree_Element',
|
||||
'compat_xml_etree_ElementTree_ParseError',
|
||||
'compat_xml_etree_register_namespace',
|
||||
'compat_xpath',
|
||||
'compat_zip',
|
||||
'workaround_optparse_bug9161',
|
||||
|
@@ -1,22 +1,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}):
|
||||
info_dict['protocol'] = determine_protocol(info_dict)
|
||||
info_copy = info_dict.copy()
|
||||
return _get_suitable_downloader(info_copy, params)
|
||||
|
||||
|
||||
# Some of these require get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .dash import DashSegmentsFD
|
||||
from .f4m import F4mFD
|
||||
from .hls import HlsFD
|
||||
from .http import HttpFD
|
||||
from .rtmp import RtmpFD
|
||||
from .dash import DashSegmentsFD
|
||||
from .rtsp import RtspFD
|
||||
from .ism import IsmFD
|
||||
from .niconico import NiconicoDmcFD
|
||||
from .external import (
|
||||
get_external_downloader,
|
||||
FFmpegFD,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
determine_protocol,
|
||||
)
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
'm3u8_native': HlsFD,
|
||||
@@ -26,13 +35,12 @@ PROTOCOL_MAP = {
|
||||
'f4m': F4mFD,
|
||||
'http_dash_segments': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
}
|
||||
|
||||
|
||||
def get_suitable_downloader(info_dict, params={}):
|
||||
def _get_suitable_downloader(info_dict, params={}):
|
||||
"""Get the downloader class that can handle the info dict."""
|
||||
protocol = determine_protocol(info_dict)
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||
# return FFmpegFD
|
||||
@@ -42,7 +50,11 @@ def get_suitable_downloader(info_dict, params={}):
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict):
|
||||
return ed
|
||||
# Avoid using unwanted args since external_downloader was rejected
|
||||
if params.get('external_downloader_args'):
|
||||
params['external_downloader_args'] = None
|
||||
|
||||
protocol = info_dict['protocol']
|
||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||
return FFmpegFD
|
||||
|
||||
|
@@ -22,6 +22,7 @@ from ..utils import (
|
||||
handle_youtubedl_headers,
|
||||
check_executable,
|
||||
is_outdated_version,
|
||||
process_communicate_or_kill,
|
||||
)
|
||||
|
||||
|
||||
@@ -104,7 +105,7 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
_, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
self.to_stderr(stderr.decode('utf-8', 'replace'))
|
||||
return p.returncode
|
||||
@@ -141,7 +142,7 @@ class CurlFD(ExternalFD):
|
||||
|
||||
# curl writes the progress to stderr so don't capture it.
|
||||
p = subprocess.Popen(cmd)
|
||||
p.communicate()
|
||||
process_communicate_or_kill(p)
|
||||
return p.returncode
|
||||
|
||||
|
||||
@@ -199,6 +200,64 @@ class Aria2cFD(ExternalFD):
|
||||
return cmd
|
||||
|
||||
|
||||
class Aria2pFD(ExternalFD):
|
||||
''' Aria2pFD class
|
||||
This class support to use aria2p as downloader.
|
||||
(Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
|
||||
through JSON-RPC.)
|
||||
It can help you to get download progress more easily.
|
||||
To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
|
||||
Then run aria2c in the background and enable with the --enable-rpc option.
|
||||
'''
|
||||
try:
|
||||
import aria2p
|
||||
__avail = True
|
||||
except ImportError:
|
||||
__avail = False
|
||||
|
||||
@classmethod
|
||||
def available(cls):
|
||||
return cls.__avail
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
aria2 = self.aria2p.API(
|
||||
self.aria2p.Client(
|
||||
host='http://localhost',
|
||||
port=6800,
|
||||
secret=''
|
||||
)
|
||||
)
|
||||
|
||||
options = {
|
||||
'min-split-size': '1M',
|
||||
'max-connection-per-server': 4,
|
||||
'auto-file-renaming': 'false',
|
||||
}
|
||||
options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
|
||||
options['out'] = os.path.basename(tmpfilename)
|
||||
options['header'] = []
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
options['header'].append('{0}: {1}'.format(key, val))
|
||||
download = aria2.add_uris([info_dict['url']], options)
|
||||
status = {
|
||||
'status': 'downloading',
|
||||
'tmpfilename': tmpfilename,
|
||||
}
|
||||
started = time.time()
|
||||
while download.status in ['active', 'waiting']:
|
||||
download = aria2.get_download(download.gid)
|
||||
status.update({
|
||||
'downloaded_bytes': download.completed_length,
|
||||
'total_bytes': download.total_length,
|
||||
'elapsed': time.time() - started,
|
||||
'eta': download.eta.total_seconds(),
|
||||
'speed': download.download_speed,
|
||||
})
|
||||
self._hook_progress(status)
|
||||
time.sleep(.5)
|
||||
return download.status != 'complete'
|
||||
|
||||
|
||||
class HttpieFD(ExternalFD):
|
||||
@classmethod
|
||||
def available(cls):
|
||||
@@ -336,14 +395,17 @@ class FFmpegFD(ExternalFD):
|
||||
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
|
||||
try:
|
||||
retval = proc.wait()
|
||||
except KeyboardInterrupt:
|
||||
# subprocces.run would send the SIGKILL signal to ffmpeg and the
|
||||
except BaseException as e:
|
||||
# subprocess.run would send the SIGKILL signal to ffmpeg and the
|
||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||
# produces a file that is playable (this is mostly useful for live
|
||||
# streams). Note that Windows is not affected and produces playable
|
||||
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
|
||||
if sys.platform != 'win32':
|
||||
proc.communicate(b'q')
|
||||
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
|
||||
process_communicate_or_kill(proc, b'q')
|
||||
else:
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
raise
|
||||
return retval
|
||||
|
||||
|
66
youtube_dl/downloader/niconico.py
Normal file
66
youtube_dl/downloader/niconico.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
try:
|
||||
import threading
|
||||
except ImportError:
|
||||
threading = None
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..downloader import get_suitable_downloader
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
from ..utils import sanitized_Request
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
""" Downloading niconico douga from DMC with heartbeat """
|
||||
|
||||
FD_NAME = 'niconico_dmc'
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
|
||||
if not threading:
|
||||
self.to_screen('[%s] Threading for Heartbeat not available' % self.FD_NAME)
|
||||
return fd.real_download(filename, info_dict)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_url = heartbeat_info_dict['url']
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||
timer[0].start()
|
||||
|
||||
heartbeat_info_dict['ping']()
|
||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||
try:
|
||||
heartbeat()
|
||||
if type(fd).__name__ == 'HlsFD':
|
||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||
success = fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
if heartbeat_lock:
|
||||
with heartbeat_lock:
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
@@ -89,11 +89,13 @@ class RtmpFD(FileDownloader):
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen('[rtmpdump] ' + line)
|
||||
finally:
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.wait()
|
||||
except BaseException: # Including KeyboardInterrupt
|
||||
proc.kill()
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
raise
|
||||
|
||||
url = info_dict['url']
|
||||
player_url = info_dict.get('player_url')
|
||||
|
@@ -31,30 +31,34 @@ from ..utils import (
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
IE_DESC = 'Anime Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'id': '9841',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'title': 'Fruits Basket - Episode 1',
|
||||
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
|
||||
'series': 'Fruits Basket',
|
||||
'duration': 1437,
|
||||
'release_date': '20190405',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
@@ -82,14 +86,14 @@ class ADNIE(InfoExtractor):
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
fatal=False, headers={'Origin': 'https://' + self._BASE})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '7fac1178830cfe0c')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -138,9 +142,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if not username:
|
||||
return
|
||||
try:
|
||||
url = self._API_BASE_URL + 'authentication/login'
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
url, None, 'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
'rememberMe': False,
|
||||
@@ -153,7 +157,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
self._webpage_read_content(e.cause, url, username),
|
||||
username, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
@@ -211,7 +216,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(
|
||||
self._webpage_read_content(e.cause, links_url, video_id),
|
||||
video_id, fatal=False) or {}
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
@@ -20,8 +20,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/'''
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
_THEPLATFORM_KEY = '43jXaGRQud'
|
||||
_THEPLATFORM_SECRET = 'S10BPXHMlb'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
|
@@ -18,7 +18,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'id': '2800002704436634',
|
||||
'ext': 'mp4',
|
||||
'title': 'CASIMA7.22',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'uploader': 'CASIMA Official Store',
|
||||
'timestamp': 1500717600,
|
||||
'upload_date': '20170722',
|
||||
|
89
youtube_dl/extractor/alsace20tv.py
Normal file
89
youtube_dl/extractor/alsace20tv.py
Normal file
@@ -0,0 +1,89 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class Alsace20TVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/(?:[\w-]+/)+[\w-]+-(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/VOD/Actu/JT/Votre-JT-jeudi-3-fevrier-lyNHCXpYJh.html',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:fc0bc4a0692d3d2dba4524053de4c7b7',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'duration': 1073,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
video_id) or {}
|
||||
title = info['titre']
|
||||
|
||||
formats = []
|
||||
for res, fmt_url in (info.get('files') or {}).items():
|
||||
formats.extend(
|
||||
self._extract_smil_formats(fmt_url, video_id, fatal=False)
|
||||
if '/smil:_' in fmt_url
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': clean_html(get_element_by_class('wysiwyg', webpage)),
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(self._og_search_property('video:duration', webpage) if webpage else None),
|
||||
'view_count': int_or_none(info.get('nb_vues')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id, url)
|
||||
|
||||
|
||||
class Alsace20TVEmbedIE(Alsace20TVIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?alsace20\.tv/emb/(?P<id>[\w]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.alsace20.tv/emb/lyNHCXpYJh',
|
||||
# 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb',
|
||||
'info_dict': {
|
||||
'id': 'lyNHCXpYJh',
|
||||
'ext': 'mp4',
|
||||
'title': 'Votre JT du jeudi 3 février',
|
||||
'upload_date': '20220203',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video(video_id)
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
@@ -23,15 +23,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'weeknight-japanese-suppers',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'timestamp': 1523304000,
|
||||
'upload_date': '20180409',
|
||||
'release_date': '20180409',
|
||||
'series': "America's Test Kitchen",
|
||||
'season': 'Season 18',
|
||||
'season_number': 18,
|
||||
'episode': 'Japanese Suppers',
|
||||
'episode_number': 15,
|
||||
'duration': 1376,
|
||||
'thumbnail': r're:^https?://',
|
||||
'average_rating': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -44,15 +49,20 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'timestamp': 1610737200,
|
||||
'upload_date': '20210115',
|
||||
'release_date': '20210115',
|
||||
'series': "America's Test Kitchen",
|
||||
'season': 'Season 21',
|
||||
'season_number': 21,
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'episode_number': 3,
|
||||
'duration': 1397,
|
||||
'thumbnail': r're:^https?://',
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -60,6 +70,12 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
@@ -94,7 +110,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|(?P<cooks>cooks(?:country|illustrated)))\.com(?:(?:/(?P<show2>cooks(?:country|illustrated)))?(?:/?$|(?<!ated)(?<!ated\.com)/episodes/browse/season_(?P<season>\d+)))'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
@@ -105,48 +121,93 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# Cooks Country Season
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12',
|
||||
'info_dict': {
|
||||
'id': 'season_12',
|
||||
'title': 'Season 12',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# America's Test Kitchen Series
|
||||
'url': 'https://www.americastestkitchen.com/',
|
||||
'info_dict': {
|
||||
'id': 'americastestkitchen',
|
||||
'title': 'America\'s Test Kitchen',
|
||||
},
|
||||
'playlist_count': 558,
|
||||
}, {
|
||||
# Cooks Country Series
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry',
|
||||
'info_dict': {
|
||||
'id': 'cookscountry',
|
||||
'title': 'Cook\'s Country',
|
||||
},
|
||||
'playlist_count': 199,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cookscountry/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/cooksillustrated/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
||||
season_number = int(season_number)
|
||||
match = re.match(self._VALID_URL, url).groupdict()
|
||||
show = match.get('show2')
|
||||
show_path = ('/' + show) if show else ''
|
||||
show = show or match['show']
|
||||
season_number = int_or_none(match.get('season'))
|
||||
|
||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||
slug, title = {
|
||||
'americastestkitchen': ('atk', 'America\'s Test Kitchen'),
|
||||
'cookscountry': ('cco', 'Cook\'s Country'),
|
||||
'cooksillustrated': ('cio', 'Cook\'s Illustrated'),
|
||||
}[show]
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
facet_filters = [
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
'Origin': 'https://www.%s.com' % show_name,
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps([
|
||||
'search_season_list:' + season,
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode in (season_search.get('hits') or []):
|
||||
search_url = episode.get('search_url')
|
||||
search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode'
|
||||
if not search_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||
'url': 'https://www.americastestkitchen.com%s%s' % (show_path, search_url),
|
||||
'id': try_get(episode, lambda e: e['objectID'].rsplit('_', 1)[-1]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
@@ -156,4 +217,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), 'season_%d' % season_number, season)
|
||||
entries(), playlist_id, playlist_title)
|
||||
|
@@ -3,8 +3,11 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
@@ -14,16 +17,17 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593921600,
|
||||
'duration': 6425,
|
||||
'timestamp': 1593932400,
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
@@ -39,19 +43,40 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode = ember_data['data']['attributes']
|
||||
episode_data = {}
|
||||
ember_data = {}
|
||||
# new page type 2021-11
|
||||
amp_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
||||
amp_data = try_get(amp_data,
|
||||
lambda a: self._parse_json(
|
||||
next(a[x] for x in iter(a) if episode_id in x),
|
||||
episode_id),
|
||||
dict) or {}
|
||||
amp_data = amp_data.get('d') or []
|
||||
episode_data = try_get(
|
||||
amp_data,
|
||||
lambda a: next(x for x in a
|
||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
||||
dict)
|
||||
if not episode_data:
|
||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id) or {}
|
||||
ember_data = ember_data.get(episode_id) or ember_data
|
||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
||||
episode = episode_data['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (ember_data.get('included') or []):
|
||||
for inc in (amp_data or ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
||||
|
||||
return {
|
||||
info = [{
|
||||
'id': episode_id,
|
||||
'title': episode['name'],
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
@@ -59,4 +84,10 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
}
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}]
|
||||
self._sort_formats(info)
|
||||
info = info[0]
|
||||
codecs = parse_codecs(info.get('ext', 'mp3'))
|
||||
info.update(codecs)
|
||||
return info
|
||||
|
@@ -332,9 +332,24 @@ class ARDIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
_SUB_FORMATS = (
|
||||
('./dataTimedText', 'ttml'),
|
||||
('./dataTimedTextNoOffset', 'ttml'),
|
||||
('./dataTimedTextVtt', 'vtt'),
|
||||
)
|
||||
|
||||
subtitles = {}
|
||||
for subsel, subext in _SUB_FORMATS:
|
||||
for node in video_node.findall(subsel):
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': node.attrib['url'],
|
||||
'ext': subext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': xpath_text(video_node, './videoId', default=display_id),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'display_id': display_id,
|
||||
'title': video_node.find('./title').text,
|
||||
'duration': parse_duration(video_node.find('./duration').text),
|
||||
|
@@ -342,4 +342,4 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
|
||||
return merge_dicts(
|
||||
self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title),
|
||||
{'description': self._og_search_description(webpage, default=None)})
|
||||
{'description': self._og_search_description(webpage, default=None)})
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class AudiomackIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack'
|
||||
_TESTS = [
|
||||
# hosted on audiomack
|
||||
@@ -29,25 +29,27 @@ class AudiomackIE(InfoExtractor):
|
||||
}
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
{
|
||||
'add_ie': ['Soundcloud'],
|
||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||
'info_dict': {
|
||||
'id': '258901379',
|
||||
'ext': 'mp3',
|
||||
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||
'uploader': 'ILOVEMAKONNEN',
|
||||
'upload_date': '20160414',
|
||||
}
|
||||
'only_matching': True,
|
||||
# 'info_dict': {
|
||||
# 'id': '258901379',
|
||||
# 'ext': 'mp3',
|
||||
# 'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||
# 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||
# 'uploader': 'ILOVEMAKONNEN',
|
||||
# 'upload_date': '20160414',
|
||||
# }
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# URLs end with [uploader name]/song/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
album_url_tag = self._match_id(url).replace('/song/', '/')
|
||||
|
||||
# Request the extended version of the api for extra fields like artist and title
|
||||
api_response = self._download_json(
|
||||
@@ -73,13 +75,13 @@ class AudiomackIE(InfoExtractor):
|
||||
|
||||
|
||||
class AudiomackAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)'
|
||||
IE_NAME = 'audiomack:album'
|
||||
_TESTS = [
|
||||
# Standard album playlist
|
||||
{
|
||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||
'playlist_count': 15,
|
||||
'playlist_count': 11,
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
@@ -95,24 +97,24 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
||||
'id': '837577',
|
||||
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
||||
'id': '837580',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
'playliststart': 9,
|
||||
'playlistend': 9,
|
||||
'playliststart': 2,
|
||||
'playlistend': 2,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
# URLs end with [uploader name]/[uploader title]
|
||||
# URLs end with [uploader name]/album/[uploader title]
|
||||
# this title is whatever the user types in, and is rarely
|
||||
# the proper song title. Real metadata is in the api response
|
||||
album_url_tag = self._match_id(url)
|
||||
album_url_tag = self._match_id(url).replace('/album/', '/')
|
||||
result = {'_type': 'playlist', 'entries': []}
|
||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||
@@ -134,7 +136,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = api_response[apikey]
|
||||
result[resultkey] = compat_str(api_response[apikey])
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
|
@@ -12,6 +12,7 @@ from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -39,7 +40,7 @@ from ..utils import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -395,9 +396,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
href, programme_id, mpd_id=format_id, fatal=False))
|
||||
elif transfer_format == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
||||
try:
|
||||
fmts = self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
@@ -775,21 +784,33 @@ class BBCIE(BBCCoUkIE):
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# bbcthreeConfig
|
||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||
'info_dict': {
|
||||
'id': 'p06556y7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||
'title': 'Things Not To Say to people that live on council estates',
|
||||
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
||||
'duration': 360,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
@@ -1162,9 +1183,16 @@ class BBCIE(BBCCoUkIE):
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
if initial_data is None:
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
||||
'preload state', default={})
|
||||
else:
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
@@ -1205,7 +1233,10 @@ class BBCIE(BBCCoUkIE):
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') != 'media':
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
|
59
youtube_dl/extractor/bigo.py
Normal file
59
youtube_dl/extractor/bigo.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class BigoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bigo\.tv/(?:[a-z]{2,}/)?(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bigo.tv/ja/221338632',
|
||||
'info_dict': {
|
||||
'id': '6576287577575737440',
|
||||
'title': '土よ〜💁♂️ 休憩室/REST room',
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': '✨Shin💫',
|
||||
'uploader_id': '221338632',
|
||||
'is_live': True,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://www.bigo.tv/th/Tarlerm1304',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://bigo.tv/115976881',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
info_raw = self._download_json(
|
||||
'https://bigo.tv/studio/getInternalStudioInfo',
|
||||
user_id, data=urlencode_postdata({'siteId': user_id}))
|
||||
|
||||
if not isinstance(info_raw, dict):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
if info_raw.get('code'):
|
||||
raise ExtractorError(
|
||||
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
raise ExtractorError('This user is offline.', expected=True)
|
||||
|
||||
return {
|
||||
'id': info.get('roomId') or user_id,
|
||||
'title': info.get('roomTopic') or info.get('nick_name') or user_id,
|
||||
'formats': [{
|
||||
'url': info.get('hls_src'),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
}],
|
||||
'thumbnail': info.get('snapshot'),
|
||||
'uploader': info.get('nick_name'),
|
||||
'uploader_id': user_id,
|
||||
'is_live': True,
|
||||
}
|
@@ -369,6 +369,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
}]
|
||||
|
||||
for a_format in formats:
|
||||
a_format.setdefault('http_headers', {}).update({
|
||||
'Referer': url,
|
||||
})
|
||||
|
||||
song = self._call_api('song/info', au_id)
|
||||
title = song['title']
|
||||
statistic = song.get('statistic') or {}
|
||||
|
173
youtube_dl/extractor/blerp.py
Normal file
173
youtube_dl/extractor/blerp.py
Normal file
@@ -0,0 +1,173 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from ..utils import (
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BlerpIE(InfoExtractor):
|
||||
IE_NAME = 'blerp'
|
||||
_VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a',
|
||||
'info_dict': {
|
||||
'id': '6320fe8745636cb4dd677a5a',
|
||||
'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016',
|
||||
'uploader': 'luminousaj',
|
||||
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||
'ext': 'mp3',
|
||||
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||
'info_dict': {
|
||||
'id': '5bc94ef4796001000498429f',
|
||||
'title': 'Yee',
|
||||
'uploader': '179617322678353920',
|
||||
'uploader_id': '5ba99cf71386730004552c42',
|
||||
'ext': 'mp3',
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||
}
|
||||
}]
|
||||
|
||||
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||
_GRAPHQL_QUERY = (
|
||||
'''query webBitePageGetBite($_id: MongoID!) {
|
||||
web {
|
||||
biteById(_id: $_id) {
|
||||
...bitePageFrag
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}
|
||||
|
||||
fragment bitePageFrag on Bite {
|
||||
_id
|
||||
title
|
||||
userKeywords
|
||||
keywords
|
||||
color
|
||||
visibility
|
||||
isPremium
|
||||
owned
|
||||
price
|
||||
extraReview
|
||||
isAudioExists
|
||||
image {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
userReactions {
|
||||
_id
|
||||
reactions
|
||||
createdAt
|
||||
__typename
|
||||
}
|
||||
topReactions
|
||||
totalSaveCount
|
||||
saved
|
||||
blerpLibraryType
|
||||
license
|
||||
licenseMetaData
|
||||
playCount
|
||||
totalShareCount
|
||||
totalFavoriteCount
|
||||
totalAddedToBoardCount
|
||||
userCategory
|
||||
userAudioQuality
|
||||
audioCreationState
|
||||
transcription
|
||||
userTranscription
|
||||
description
|
||||
createdAt
|
||||
updatedAt
|
||||
author
|
||||
listingType
|
||||
ownerObject {
|
||||
_id
|
||||
username
|
||||
profileImage {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
transcription
|
||||
favorited
|
||||
visibility
|
||||
isCurated
|
||||
sourceUrl
|
||||
audienceRating
|
||||
strictAudienceRating
|
||||
ownerId
|
||||
reportObject {
|
||||
reportedContentStatus
|
||||
__typename
|
||||
}
|
||||
giphy {
|
||||
mp4
|
||||
gif
|
||||
__typename
|
||||
}
|
||||
audio {
|
||||
filename
|
||||
original {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
mp3 {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
|
||||
''')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
data = {
|
||||
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {
|
||||
'_id': audio_id
|
||||
}
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||
|
||||
bite_json = json_result['data']['web']['biteById']
|
||||
|
||||
info_dict = {
|
||||
'id': bite_json['_id'],
|
||||
'url': bite_json['audio']['mp3']['url'],
|
||||
'title': bite_json['title'],
|
||||
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||
'ext': 'mp3',
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||
}
|
||||
|
||||
return info_dict
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -12,13 +13,28 @@ from ..utils import (
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.(?:com|net))/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://de.bongacams.net/claireashton',
|
||||
'info_dict': {
|
||||
'id': 'claireashton',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ClaireAshton \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'age_limit': 18,
|
||||
'uploader_id': 'ClaireAshton',
|
||||
'uploader': 'ClaireAshton',
|
||||
'like_count': int,
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
74
youtube_dl/extractor/callin.py
Normal file
74
youtube_dl/extractor/callin.py
Normal file
@@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class CallinIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?:[^/#?-]+-)*(?P<id>[^/#?-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW',
|
||||
'md5': '14ede27ee2c957b7e4db93140fc0745c',
|
||||
'info_dict': {
|
||||
'id': 'PrumRdSQJW',
|
||||
'ext': 'mp4',
|
||||
'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink',
|
||||
'description': 'Or, why the government doesn’t like SpaceX',
|
||||
'channel': 'The Pull Request',
|
||||
'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA',
|
||||
'md5': '16f704ddbf82a27e3930533b12062f07',
|
||||
'info_dict': {
|
||||
'id': 'lzxMidUnjA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?',
|
||||
'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.',
|
||||
'channel': 'The DEBRIEF With Briahna Joy Gray',
|
||||
'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm',
|
||||
}
|
||||
}]
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, transform_source=None, fatal=True, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
next_data = self._search_nextjs_data(webpage, video_id)
|
||||
episode = traverse_obj(next_data, ('props', 'pageProps', 'episode'), expected_type=dict)
|
||||
if not episode:
|
||||
raise ExtractorError('Failed to find episode data')
|
||||
|
||||
title = episode.get('title') or self._og_search_title(webpage)
|
||||
description = episode.get('description') or self._og_search_description(webpage)
|
||||
|
||||
formats = []
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
episode.get('m3u8'), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
channel = try_get(episode, lambda x: x['show']['title'], compat_str)
|
||||
channel_url = try_get(episode, lambda x: x['show']['linkObj']['resourceUrl'], compat_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats,
|
||||
'channel': channel,
|
||||
'channel_url': channel_url,
|
||||
}
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -20,32 +19,11 @@ class CamModelsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, headers=self.geo_verification_headers())
|
||||
|
||||
manifest_root = self._html_search_regex(
|
||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
||||
|
||||
if not manifest_root:
|
||||
ERRORS = (
|
||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
||||
('in a private show', 'This user is in a private show'),
|
||||
('is currently performing LIVE', 'This model is currently performing live'),
|
||||
)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
error = message
|
||||
expected = True
|
||||
break
|
||||
else:
|
||||
error = 'Unable to find manifest URL root'
|
||||
expected = False
|
||||
raise ExtractorError(error, expected=expected)
|
||||
|
||||
manifest = self._download_json(
|
||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
||||
'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id)
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for format_id, format_dict in manifest['formats'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
@@ -85,6 +63,13 @@ class CamModelsIE(InfoExtractor):
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
if format_id == 'jpeg':
|
||||
thumbnails.append({
|
||||
'url': f['url'],
|
||||
'width': f['width'],
|
||||
'height': f['height'],
|
||||
'format_id': f['format_id'],
|
||||
})
|
||||
continue
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
@@ -92,6 +77,7 @@ class CamModelsIE(InfoExtractor):
|
||||
return {
|
||||
'id': user_id,
|
||||
'title': self._live_title(user_id),
|
||||
'thumbnails': thumbnails,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'age_limit': 18
|
||||
|
@@ -12,35 +12,21 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
|
||||
'info_dict': {
|
||||
'id': '61924494877246241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Život v Grónsku',
|
||||
'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 3350,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
||||
'info_dict': {
|
||||
'id': '61924494877028507',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hyde Park Civilizace: Bonus 01 - En',
|
||||
'title': 'Bonus 01 - En - Hyde Park Civilizace',
|
||||
'description': 'English Subtittles',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 81.3,
|
||||
@@ -51,31 +37,111 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'id': '102',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'title': r'ČT1 - živé vysílání online',
|
||||
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
# another
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 402,
|
||||
'ext': 'mp4',
|
||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
# 'skip': 'Georestricted to Czech Republic',
|
||||
}, {
|
||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Bogotart - Queer',
|
||||
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494877311053',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bogotart - Queer (Varování 18+)',
|
||||
'duration': 11.9,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bogotart - Queer (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', **kw),
|
||||
video_id, **kw)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
type_ = 'IDEC'
|
||||
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
||||
next_data = self._search_nextjs_data(webpage, playlist_id)
|
||||
if '/zive/' in parsed_url.path:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
||||
else:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||
if not idec:
|
||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
||||
if idec:
|
||||
type_ = 'bonus'
|
||||
if not idec:
|
||||
raise ExtractorError('Failed to find IDEC id')
|
||||
iframe_hash = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
|
||||
playlist_id, note='Getting IFRAME hash')
|
||||
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
|
||||
webpage = self._download_webpage(
|
||||
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
|
||||
playlist_id, note='Downloading player', query=query)
|
||||
|
||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
self.raise_geo_restricted(NOT_AVAILABLE_STRING)
|
||||
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
|
||||
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
||||
|
||||
type_ = None
|
||||
episode_id = None
|
||||
@@ -100,7 +166,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
data = {
|
||||
'playlist[0][type]': type_,
|
||||
'playlist[0][id]': episode_id,
|
||||
'requestUrl': compat_urllib_parse_urlparse(url).path,
|
||||
'requestUrl': parsed_url.path,
|
||||
'requestSource': 'iVysilani',
|
||||
}
|
||||
|
||||
@@ -108,7 +174,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
@@ -130,9 +196,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
@@ -167,7 +230,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
entries[num]['formats'].extend(formats)
|
||||
continue
|
||||
|
||||
item_id = item.get('id') or item['assetId']
|
||||
item_id = str_or_none(item.get('id') or item['assetId'])
|
||||
title = item['title']
|
||||
|
||||
duration = float_or_none(item.get('duration'))
|
||||
@@ -181,8 +244,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
if playlist_len == 1:
|
||||
final_title = playlist_title or title
|
||||
if is_live:
|
||||
final_title = self._live_title(final_title)
|
||||
else:
|
||||
final_title = '%s (%s)' % (playlist_title, title)
|
||||
|
||||
@@ -200,6 +261,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
for e in entries:
|
||||
self._sort_formats(e['formats'])
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def _get_subtitles(self, episode_id, subs):
|
||||
@@ -236,54 +299,3 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
yield line
|
||||
|
||||
return '\r\n'.join(_fix_subtitle(subtitles))
|
||||
|
||||
|
||||
class CeskaTelevizePoradyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# video with 18+ caution trailer
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||
'info_dict': {
|
||||
'id': '215562210900007-bogotart',
|
||||
'title': 'Queer: Bogotart',
|
||||
'description': 'Alternativní průvodce současným queer světem',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '61924494876844842',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Varování 18+)',
|
||||
'duration': 10.2,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '61924494877068022',
|
||||
'ext': 'mp4',
|
||||
'title': 'Queer: Bogotart (Queer)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 1558.3,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data_url = update_url_query(unescapeHTML(self._search_regex(
|
||||
(r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
|
||||
webpage, 'iframe player url', group='url')), query={
|
||||
'autoStart': 'true',
|
||||
})
|
||||
|
||||
return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
|
||||
|
@@ -70,6 +70,7 @@ from ..utils import (
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -1086,7 +1087,7 @@ class InfoExtractor(object):
|
||||
# Helper functions for extracting OpenGraph info
|
||||
@staticmethod
|
||||
def _og_regexes(prop):
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
|
||||
property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
|
||||
% {'prop': re.escape(prop)})
|
||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||
@@ -2713,7 +2714,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
@@ -2734,9 +2735,14 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
flat_pl = try_get(jwplayer_data, lambda x: x.get('playlist') or True)
|
||||
if flat_pl is None:
|
||||
# not even a dict
|
||||
return []
|
||||
|
||||
# JWPlayer backward compatibility: flattened playlists
|
||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||
if 'playlist' not in jwplayer_data:
|
||||
if flat_pl is True:
|
||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||
|
||||
entries = []
|
||||
@@ -2784,6 +2790,13 @@ class InfoExtractor(object):
|
||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ...
|
||||
'genre': clean_html(video_data.get('genre')),
|
||||
'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
'release_year': int_or_none(video_data.get('releasedate')),
|
||||
'age_limit': int_or_none(video_data.get('age_restriction')),
|
||||
}
|
||||
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
|
||||
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
|
||||
@@ -2792,7 +2805,9 @@ class InfoExtractor(object):
|
||||
'url': formats[0]['url'],
|
||||
})
|
||||
else:
|
||||
self._sort_formats(formats)
|
||||
# avoid exception in case of only sttls
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
entry['formats'] = formats
|
||||
entries.append(entry)
|
||||
if len(entries) == 1:
|
||||
@@ -2802,7 +2817,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
urls = []
|
||||
urls = set()
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
if not isinstance(source, dict):
|
||||
@@ -2811,14 +2826,14 @@ class InfoExtractor(object):
|
||||
base_url, self._proto_relative_url(source.get('file')))
|
||||
if not source_url or source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
urls.add(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=m3u8_id, fatal=False))
|
||||
elif source_type == 'dash' or ext == 'mpd':
|
||||
elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_url, video_id, mpd_id=mpd_id, fatal=False))
|
||||
elif ext == 'smil':
|
||||
@@ -2833,20 +2848,23 @@ class InfoExtractor(object):
|
||||
'ext': ext,
|
||||
})
|
||||
else:
|
||||
format_id = str_or_none(source.get('label'))
|
||||
height = int_or_none(source.get('height'))
|
||||
if height is None:
|
||||
if height is None and format_id:
|
||||
# Often no height is provided but there is a label in
|
||||
# format like "1080p", "720p SD", or 1080.
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
|
||||
'height', default=None))
|
||||
height = parse_resolution(format_id).get('height')
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': height,
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
'tbr': int_or_none(source.get('bitrate'), scale=1000),
|
||||
'filesize': int_or_none(source.get('filesize')),
|
||||
'ext': ext,
|
||||
}
|
||||
if format_id:
|
||||
a_format['format_id'] = format_id
|
||||
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv'
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
|
148
youtube_dl/extractor/cpac.py
Normal file
148
youtube_dl/extractor/cpac.py
Normal file
@@ -0,0 +1,148 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
# compat_range
|
||||
try:
|
||||
if callable(xrange):
|
||||
range = xrange
|
||||
except (NameError, TypeError):
|
||||
pass
|
||||
|
||||
|
||||
class CPACIE(InfoExtractor):
|
||||
IE_NAME = 'cpac'
|
||||
_VALID_URL = r'https?://(?:www\.)?cpac\.ca/(?P<fr>l-)?episode\?id=(?P<id>[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12})'
|
||||
_TEST = {
|
||||
# 'url': 'http://www.cpac.ca/en/programs/primetime-politics/episodes/65490909',
|
||||
'url': 'https://www.cpac.ca/episode?id=fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||
'md5': 'e46ad699caafd7aa6024279f2614e8fa',
|
||||
'info_dict': {
|
||||
'id': 'fc7edcae-4660-47e1-ba61-5b7f29a9db0f',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220215',
|
||||
'title': 'News Conference to Celebrate National Kindness Week – February 15, 2022',
|
||||
'description': 'md5:466a206abd21f3a6f776cdef290c23fb',
|
||||
'timestamp': 1644901200,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_lang = 'fr' if '/l-episode?' in url else 'en'
|
||||
|
||||
content = self._download_json(
|
||||
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id,
|
||||
video_id)
|
||||
video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str)
|
||||
formats = []
|
||||
if video_url:
|
||||
content = content['page']
|
||||
title = str_or_none(content['details']['title_%s_t' % (url_lang, )])
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4')
|
||||
for fmt in formats:
|
||||
# prefer language to match URL
|
||||
fmt_lang = fmt.get('language')
|
||||
if fmt_lang == url_lang:
|
||||
fmt['language_preference'] = 10
|
||||
elif not fmt_lang:
|
||||
fmt['language_preference'] = -1
|
||||
else:
|
||||
fmt['language_preference'] = -10
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
|
||||
|
||||
def is_live(v_type):
|
||||
return (v_type == 'live') if v_type is not None else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
|
||||
'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
|
||||
'category': [category] if category else None,
|
||||
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
|
||||
'is_live': is_live(content['details'].get('type')),
|
||||
}
|
||||
|
||||
|
||||
class CPACPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'cpac:playlist'
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?cpac\.ca/(?:program|search|(?P<fr>emission|rechercher))\?(?:[^&]+&)*?(?P<id>(?:id=\d+|programId=\d+|key=[^&]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cpac.ca/program?id=6',
|
||||
'info_dict': {
|
||||
'id': 'id=6',
|
||||
'title': 'Headline Politics',
|
||||
'description': 'Watch CPAC’s signature long-form coverage of the day’s pressing political events as they unfold.',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/search?key=hudson&type=all&order=desc',
|
||||
'info_dict': {
|
||||
'id': 'key=hudson',
|
||||
'title': 'hudson',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/search?programId=50',
|
||||
'info_dict': {
|
||||
'id': 'programId=50',
|
||||
'title': '50',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/emission?id=6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cpac.ca/rechercher?key=hudson&type=all&order=desc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en'
|
||||
pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult')
|
||||
api_url = (
|
||||
'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s'
|
||||
% (pl_type, video_id, ))
|
||||
content = self._download_json(api_url, video_id)
|
||||
entries = []
|
||||
total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1)
|
||||
for page in range(1, total_pages + 1):
|
||||
if page > 1:
|
||||
api_url = update_url_query(api_url, {'page': '%d' % (page, ), })
|
||||
content = self._download_json(
|
||||
api_url, video_id,
|
||||
note='Downloading continuation - %d' % (page, ),
|
||||
fatal=False)
|
||||
|
||||
for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []:
|
||||
episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )]))
|
||||
if episode_url:
|
||||
entries.append(episode_url)
|
||||
|
||||
return self.playlist_result(
|
||||
(self.url_result(entry) for entry in entries),
|
||||
playlist_id=video_id,
|
||||
playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1],
|
||||
playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]),
|
||||
)
|
@@ -51,6 +51,10 @@ from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .alsace20tv import (
|
||||
Alsace20TVIE,
|
||||
Alsace20TVEmbedIE,
|
||||
)
|
||||
from .apa import APAIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
@@ -71,6 +75,7 @@ from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
@@ -114,6 +119,7 @@ from .bfmtv import (
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bigo import BigoIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
@@ -132,6 +138,7 @@ from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
BleacherReportCMSIE,
|
||||
)
|
||||
from .blerp import BlerpIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
@@ -152,6 +159,7 @@ from .businessinsider import BusinessInsiderIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
from .callin import CallinIE
|
||||
from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
@@ -202,10 +210,7 @@ from .ccc import (
|
||||
from .ccma import CCMAIE
|
||||
from .cctv import CCTVIE
|
||||
from .cda import CDAIE
|
||||
from .ceskatelevize import (
|
||||
CeskaTelevizeIE,
|
||||
CeskaTelevizePoradyIE,
|
||||
)
|
||||
from .ceskatelevize import CeskaTelevizeIE
|
||||
from .channel9 import Channel9IE
|
||||
from .charlierose import CharlieRoseIE
|
||||
from .chaturbate import ChaturbateIE
|
||||
@@ -253,6 +258,10 @@ from .commonprotocols import (
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
from .corus import CorusIE
|
||||
from .cpac import (
|
||||
CPACIE,
|
||||
CPACPlaylistIE,
|
||||
)
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
@@ -367,6 +376,8 @@ from .fc2 import (
|
||||
FC2EmbedIE,
|
||||
)
|
||||
from .fczenit import FczenitIE
|
||||
from .filemoon import FileMoonIE
|
||||
from .fifa import FifaIE
|
||||
from .filmon import (
|
||||
FilmOnIE,
|
||||
FilmOnChannelIE,
|
||||
@@ -469,6 +480,7 @@ from .hotstar import (
|
||||
)
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrfernsehen import HRFernsehenIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
HRTiPlaylistIE,
|
||||
@@ -545,8 +557,10 @@ from .khanacademy import (
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .kommunetv import KommunetvIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .krasview import KrasViewIE
|
||||
from .kth import KTHIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
@@ -716,6 +730,7 @@ from .myvi import (
|
||||
MyviIE,
|
||||
MyviEmbedIE,
|
||||
)
|
||||
from .myvideoge import MyVideoGeIE
|
||||
from .myvidster import MyVidsterIE
|
||||
from .nationalgeographic import (
|
||||
NationalGeographicVideoIE,
|
||||
@@ -789,7 +804,14 @@ from .nick import (
|
||||
NickNightIE,
|
||||
NickRuIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .niconico import (
|
||||
NiconicoIE,
|
||||
NiconicoPlaylistIE,
|
||||
NiconicoUserIE,
|
||||
NicovideoSearchIE,
|
||||
NicovideoSearchDateIE,
|
||||
NicovideoSearchURLIE,
|
||||
)
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
@@ -893,6 +915,10 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import (
|
||||
PeekVidsIE,
|
||||
PlayVidsIE,
|
||||
)
|
||||
from .peertube import PeerTubeIE
|
||||
from .people import PeopleIE
|
||||
from .performgroup import PerformGroupIE
|
||||
@@ -986,6 +1012,10 @@ from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import (
|
||||
@@ -1176,6 +1206,7 @@ from .storyfire import (
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamsb import StreamsbIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
from .stretchinternet import StretchInternetIE
|
||||
from .stv import STVPlayerIE
|
||||
@@ -1245,6 +1276,11 @@ from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .thisvid import (
|
||||
ThisVidIE,
|
||||
ThisVidMemberIE,
|
||||
ThisVidPlaylistIE,
|
||||
)
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import (
|
||||
TikTokIE,
|
||||
@@ -1606,7 +1642,7 @@ from .youtube import (
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
#YoutubeSearchURLIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
@@ -1642,3 +1678,7 @@ from .zingmp3 import (
|
||||
)
|
||||
from .zoom import ZoomIE
|
||||
from .zype import ZypeIE
|
||||
from .pr0gramm import (
|
||||
Pr0grammIE,
|
||||
Pr0grammStaticIE,
|
||||
)
|
||||
|
101
youtube_dl/extractor/fifa.py
Normal file
101
youtube_dl/extractor/fifa.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
if not callable(getattr(InfoExtractor, '_match_valid_url', None)):
|
||||
|
||||
BaseInfoExtractor = InfoExtractor
|
||||
|
||||
import re
|
||||
|
||||
class InfoExtractor(BaseInfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _match_valid_url(cls, url):
|
||||
return re.match(cls._VALID_URL, url)
|
||||
|
||||
|
||||
class FifaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y',
|
||||
'info_dict': {
|
||||
'id': '7on10qPcnyLajDDU3ntg6y',
|
||||
'title': 'Italy v France | Final | 2006 FIFA World Cup Germany™ | Full Match Replay',
|
||||
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments'],
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
|
||||
'duration': 8165,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV',
|
||||
'info_dict': {
|
||||
'id': '1cg5r5Qt6Qt12ilkDgb1sV',
|
||||
'title': 'Brazil v Germany | Semi-finals | 2014 FIFA World Cup Brazil™ | Extended Highlights',
|
||||
'description': 'md5:d908c74ee66322b804ae2e521b02a855',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments', 'Highlights'],
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB',
|
||||
'duration': 902,
|
||||
'release_timestamp': 1404777600,
|
||||
'release_date': '20140708',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp',
|
||||
'info_dict': {
|
||||
'id': '3C6gQH9C2DLwzNx7BMRQdp',
|
||||
'title': 'Josimar goal against Northern Ireland | Classic Goals',
|
||||
'description': 'md5:cbe7e7bb52f603c9f1fe9a4780fe983b',
|
||||
'ext': 'mp4',
|
||||
'categories': ['FIFA Tournaments', 'Goal'],
|
||||
'duration': 28,
|
||||
'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, locale = self._match_valid_url(url).group('id', 'locale')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
preconnect_link = self._search_regex(
|
||||
r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
|
||||
|
||||
video_details = self._download_json(
|
||||
'{preconnect_link}/sections/videoDetails/{video_id}'.format(**locals()), video_id, 'Downloading Video Details', fatal=False)
|
||||
|
||||
preplay_parameters = self._download_json(
|
||||
'{preconnect_link}/videoPlayerData/{video_id}'.format(**locals()), video_id, 'Downloading Preplay Parameters')['preplayParameters']
|
||||
|
||||
content_data = self._download_json(
|
||||
# 1. query string is expected to be sent as-is
|
||||
# 2. `sig` must be appended
|
||||
# 3. if absent, the call appears to work but the manifest is bad (404)
|
||||
'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
|
||||
video_id, 'Downloading Content Data')
|
||||
|
||||
# formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats(content_data['playURL'], video_id, ext='mp4', entry_protocol='m3u8_native'), None
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_details['title'],
|
||||
'description': video_details.get('description'),
|
||||
'duration': int_or_none(video_details.get('duration')),
|
||||
'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')),
|
||||
'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)),
|
||||
'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
43
youtube_dl/extractor/filemoon.py
Normal file
43
youtube_dl/extractor/filemoon.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class FileMoonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
|
||||
_TEST = {
|
||||
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
|
||||
'md5': '5a713742f57ac4aef29b74733e8dda01',
|
||||
'info_dict': {
|
||||
'id': 'dw40rxrzruqz',
|
||||
'title': 'dw40rxrzruqz',
|
||||
'ext': 'mp4'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
|
||||
packed = matches[-1]
|
||||
unpacked = decode_packed_codes(packed)
|
||||
jwplayer_sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._generic_title(url) or video_id,
|
||||
'formats': formats
|
||||
}
|
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
@@ -35,6 +36,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -2227,6 +2229,97 @@ class GenericIE(InfoExtractor):
|
||||
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
|
||||
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July',
|
||||
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://www.kvs-demo.com/embed/105/',
|
||||
'info_dict': {
|
||||
'id': '105',
|
||||
'display_id': 'kelis-4th-of-july',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kelis - 4th Of July / Embed Player',
|
||||
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# KVS Player (tested also in thisvid.py)
|
||||
'url': 'https://youix.com/video/leningrad-zoj/',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
|
||||
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://youix.com/embed/18485',
|
||||
'md5': '94f96ba95706dc3880812b27b7d8a2b8',
|
||||
'info_dict': {
|
||||
'id': '18485',
|
||||
'display_id': 'leningrad-zoj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ленинград - ЗОЖ',
|
||||
'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player
|
||||
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
|
||||
'md5': '94166bdb26b4cb1fb9214319a629fc51',
|
||||
'info_dict': {
|
||||
'id': '21217',
|
||||
'display_id': '40-nochey-2016',
|
||||
'ext': 'mp4',
|
||||
'title': '40 ночей (2016) - BogMedia.org',
|
||||
'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
|
||||
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
|
||||
},
|
||||
}, {
|
||||
# KVS Player (for sites that serve kt_player.js via non-https urls)
|
||||
'url': 'http://www.camhub.world/embed/389508',
|
||||
'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
|
||||
'info_dict': {
|
||||
'id': '389508',
|
||||
'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
|
||||
'ext': 'mp4',
|
||||
'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
|
||||
'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mrdeepfakes.com/video/5/selena-gomez-pov-deep-fakes',
|
||||
'md5': 'fec4ad5ec150f655e0c74c696a4a2ff4',
|
||||
'info_dict': {
|
||||
'id': '5',
|
||||
'display_id': 'selena-gomez-pov-deep-fakes',
|
||||
'ext': 'mp4',
|
||||
'title': 'Selena Gomez POV (Deep Fakes) DeepFake Porn - MrDeepFakes',
|
||||
'description': 'md5:17d1f84b578c9c26875ac5ef9a932354',
|
||||
'height': 720,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
|
||||
'md5': 'e2f0a4c329f7986280b7328e24036d60',
|
||||
'info_dict': {
|
||||
'id': '284002',
|
||||
'display_id': 'just-out-of-the-shower-joi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Just Out Of The Shower JOI - Shooshtime',
|
||||
'height': 720,
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@@ -2332,6 +2425,88 @@ class GenericIE(InfoExtractor):
|
||||
'title': title,
|
||||
}
|
||||
|
||||
def _extract_kvs(self, url, webpage, video_id):
|
||||
|
||||
def getlicensetoken(license):
|
||||
modlicense = license.replace('$', '').replace('0', '1')
|
||||
center = int(len(modlicense) / 2)
|
||||
fronthalf = int(modlicense[:center + 1])
|
||||
backhalf = int(modlicense[center:])
|
||||
|
||||
modlicense = compat_str(4 * abs(fronthalf - backhalf))
|
||||
|
||||
def parts():
|
||||
for o in range(0, center + 1):
|
||||
for i in range(1, 5):
|
||||
yield compat_str((int(license[o + i]) + int(modlicense[o])) % 10)
|
||||
|
||||
return ''.join(parts())
|
||||
|
||||
def getrealurl(video_url, license_code):
|
||||
if not video_url.startswith('function/0/'):
|
||||
return video_url # not obfuscated
|
||||
|
||||
url_path, _, url_query = video_url.partition('?')
|
||||
urlparts = url_path.split('/')[2:]
|
||||
license = getlicensetoken(license_code)
|
||||
newmagic = urlparts[5][:32]
|
||||
|
||||
def spells(x, o):
|
||||
l = (o + sum(int(n) for n in license[o:])) % 32
|
||||
for i in range(0, len(x)):
|
||||
yield {l: x[o], o: x[l]}.get(i, x[i])
|
||||
|
||||
for o in range(len(newmagic) - 1, -1, -1):
|
||||
newmagic = ''.join(spells(newmagic, o))
|
||||
|
||||
urlparts[5] = newmagic + urlparts[5][32:]
|
||||
return '/'.join(urlparts) + '?' + url_query
|
||||
|
||||
flashvars = self._search_regex(
|
||||
r'(?s)<script\b[^>]*>.*?var\s+flashvars\s*=\s*(\{.+?\});.*?</script>',
|
||||
webpage, 'flashvars')
|
||||
flashvars = self._parse_json(flashvars, video_id, transform_source=js_to_json)
|
||||
|
||||
# extract the part after the last / as the display_id from the
|
||||
# canonical URL.
|
||||
display_id = self._search_regex(
|
||||
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
|
||||
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
|
||||
webpage, 'display_id', fatal=False
|
||||
)
|
||||
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
|
||||
|
||||
thumbnail = flashvars['preview_url']
|
||||
if thumbnail.startswith('//'):
|
||||
protocol, _, _ = url.partition('/')
|
||||
thumbnail = protocol + thumbnail
|
||||
|
||||
url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
|
||||
formats = []
|
||||
for key in url_keys:
|
||||
if '/get_file/' not in flashvars[key]:
|
||||
continue
|
||||
format_id = flashvars.get(key + '_text', key)
|
||||
formats.append(merge_dicts(
|
||||
parse_resolution(format_id) or parse_resolution(flashvars[key]), {
|
||||
'url': urljoin(url, getrealurl(flashvars[key], flashvars['license_code'])),
|
||||
'format_id': format_id,
|
||||
'ext': 'mp4',
|
||||
'http_headers': {'Referer': url},
|
||||
}))
|
||||
if not formats[-1].get('height'):
|
||||
formats[-1]['quality'] = 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': flashvars['video_id'],
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
if url.startswith('//'):
|
||||
return self.url_result(self.http_scheme() + url)
|
||||
@@ -2540,9 +2715,16 @@ class GenericIE(InfoExtractor):
|
||||
# but actually don't.
|
||||
AGE_LIMIT_MARKERS = [
|
||||
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
|
||||
r'>[^<]*you acknowledge you are at least (\d+) years old',
|
||||
r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
|
||||
]
|
||||
if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
|
||||
age_limit = 18
|
||||
for marker in AGE_LIMIT_MARKERS:
|
||||
m = re.search(marker, webpage)
|
||||
if not m:
|
||||
continue
|
||||
age_limit = max(
|
||||
age_limit or 0,
|
||||
int_or_none(m.groups() and m.group(1), default=18))
|
||||
|
||||
# video uploader is domain name
|
||||
video_uploader = self._search_regex(
|
||||
@@ -3389,6 +3571,20 @@ class GenericIE(InfoExtractor):
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
# Look for generic KVS player (before ld+json for tests)
|
||||
found = self._search_regex(
|
||||
(r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
|
||||
# kt_player('kt_player', 'https://i.shoosh.co/player/kt_player.swf?v=5.5.1', ...
|
||||
r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
|
||||
), webpage, 'KVS player', group='ver', default=False)
|
||||
if found:
|
||||
self.report_extraction('%s: KVS Player' % (video_id, ))
|
||||
if found.split('.')[0] not in ('4', '5', '6'):
|
||||
self.report_warning('Untested major version (%s) in player engine - download may fail.' % (found, ))
|
||||
return merge_dicts(
|
||||
self._extract_kvs(url, webpage, video_id),
|
||||
info_dict)
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
|
101
youtube_dl/extractor/hrfernsehen.py
Normal file
101
youtube_dl/extractor/hrfernsehen.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
unescapeHTML
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HRFernsehenIE(InfoExtractor):
|
||||
IE_NAME = 'hrfernsehen'
|
||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||
'info_dict': {
|
||||
'id': '130546',
|
||||
'ext': 'mp4',
|
||||
'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
|
||||
'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
|
||||
'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
|
||||
'subtitles': {'de': [{
|
||||
'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
|
||||
}]},
|
||||
'timestamp': 1598470200,
|
||||
'upload_date': '20200826',
|
||||
'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
|
||||
'title': 'hessenschau vom 26.08.2020'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def extract_airdate(self, loader_data):
|
||||
airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
|
||||
|
||||
if airdate_str is None:
|
||||
return None
|
||||
|
||||
return unified_timestamp(airdate_str)
|
||||
|
||||
def extract_formats(self, loader_data):
|
||||
stream_formats = []
|
||||
for stream_obj in loader_data["videoResolutionLevels"]:
|
||||
stream_format = {
|
||||
'format_id': str(stream_obj['verticalResolution']) + "p",
|
||||
'height': stream_obj['verticalResolution'],
|
||||
'url': stream_obj['url'],
|
||||
}
|
||||
|
||||
quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
|
||||
stream_obj['url'])
|
||||
if quality_information:
|
||||
stream_format['width'] = int_or_none(quality_information.group(1))
|
||||
stream_format['height'] = int_or_none(quality_information.group(2))
|
||||
stream_format['fps'] = int_or_none(quality_information.group(3))
|
||||
stream_format['tbr'] = int_or_none(quality_information.group(4))
|
||||
|
||||
stream_formats.append(stream_format)
|
||||
|
||||
self._sort_formats(stream_formats)
|
||||
return stream_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title', 'name'], webpage)
|
||||
description = self._html_search_meta(
|
||||
['description'], webpage)
|
||||
|
||||
loader_str = unescapeHTML(self._search_regex(r"data-new-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
|
||||
loader_data = json.loads(loader_str)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': self.extract_formats(loader_data),
|
||||
'timestamp': self.extract_airdate(loader_data)
|
||||
}
|
||||
|
||||
if "subtitle" in loader_data:
|
||||
info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
|
||||
|
||||
thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
|
||||
if len(thumbnails) > 0:
|
||||
info["thumbnails"] = [{"url": t} for t in thumbnails]
|
||||
|
||||
return info
|
@@ -1,19 +1,29 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_filter as filter,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
determine_ext,
|
||||
error_to_compat_str,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,14 +32,102 @@ class IGNBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||
|
||||
def _checked_call_api(self, slug):
|
||||
try:
|
||||
return self._call_api(slug)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
e.cause.args = e.cause.args or [
|
||||
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||
raise ExtractorError(
|
||||
'Content not found: expired?', cause=e.cause,
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
def _extract_video_info(self, video, fatal=True):
|
||||
video_id = video['videoId']
|
||||
|
||||
formats = []
|
||||
refs = traverse_obj(video, 'refs', expected_type=dict) or {}
|
||||
|
||||
m3u8_url = url_or_none(refs.get('m3uUrl'))
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
f4m_url = url_or_none(refs.get('f4mUrl'))
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
for asset in (video.get('assets') or []):
|
||||
asset_url = url_or_none(asset.get('url'))
|
||||
if not asset_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': asset_url,
|
||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||
'fps': int_or_none(asset.get('frame_rate')),
|
||||
'height': int_or_none(asset.get('height')),
|
||||
'width': int_or_none(asset.get('width')),
|
||||
})
|
||||
|
||||
mezzanine_url = traverse_obj(
|
||||
video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
|
||||
if mezzanine_url:
|
||||
formats.append({
|
||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||
'format_id': 'mezzanine',
|
||||
'preference': 1,
|
||||
'url': mezzanine_url,
|
||||
})
|
||||
|
||||
if formats or fatal:
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
return
|
||||
|
||||
thumbnails = traverse_obj(
|
||||
video, ('thumbnails', Ellipsis, {'url': 'url'}), expected_type=url_or_none)
|
||||
tags = traverse_obj(
|
||||
video, ('tags', Ellipsis, 'displayName'),
|
||||
expected_type=lambda x: x.strip() or None)
|
||||
|
||||
metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
|
||||
title = traverse_obj(
|
||||
metadata, 'longTitle', 'title', 'name',
|
||||
expected_type=lambda x: x.strip() or None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metadata.get('description')),
|
||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
# yt-dlp shim
|
||||
@classmethod
|
||||
def _extract_from_webpage(cls, url, webpage):
|
||||
for embed_url in orderedSet(
|
||||
cls._extract_embed_urls(url, webpage) or [], lazy=True):
|
||||
yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
|
||||
|
||||
|
||||
class IGNIE(IGNBaseIE):
|
||||
"""
|
||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||
Some videos of it.ign.com are also supported
|
||||
"""
|
||||
|
||||
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
||||
_VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
|
||||
_PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
|
||||
_VALID_URL = (
|
||||
r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
|
||||
% '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
|
||||
IE_NAME = 'ign.com'
|
||||
_PAGE_TYPE = 'video'
|
||||
|
||||
@@ -44,7 +142,10 @@ class IGNIE(IGNBaseIE):
|
||||
'timestamp': 1370440800,
|
||||
'upload_date': '20130605',
|
||||
'tags': 'count:9',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'nocheckcertificate': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||
@@ -56,86 +157,51 @@ class IGNIE(IGNBaseIE):
|
||||
'timestamp': 1420571160,
|
||||
'upload_date': '20150106',
|
||||
'tags': 'count:4',
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
grids = re.findall(
|
||||
r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
|
||||
webpage)
|
||||
return filter(None,
|
||||
(urljoin(url, m.group('path')) for m in re.finditer(
|
||||
r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
|
||||
% cls._VIDEO_PATH_RE, grids[0] if grids else '')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
display_id = m.group('id')
|
||||
if display_id:
|
||||
return self._extract_video(url, display_id)
|
||||
display_id = m.group('filt') or 'all'
|
||||
return self._extract_playlist(url, display_id)
|
||||
|
||||
def _extract_playlist(self, url, display_id):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.playlist_result(
|
||||
(self.url_result(u, ie=self.ie_key())
|
||||
for u in self._extract_embed_urls(url, webpage)),
|
||||
playlist_id=display_id)
|
||||
|
||||
def _extract_video(self, url, display_id):
|
||||
display_id = self._match_id(url)
|
||||
video = self._call_api(display_id)
|
||||
video_id = video['videoId']
|
||||
metadata = video['metadata']
|
||||
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
||||
video = self._checked_call_api(display_id)
|
||||
|
||||
formats = []
|
||||
refs = video.get('refs') or {}
|
||||
info = self._extract_video_info(video)
|
||||
|
||||
m3u8_url = refs.get('m3uUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
f4m_url = refs.get('f4mUrl')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
for asset in (video.get('assets') or []):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': asset_url,
|
||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||
'fps': int_or_none(asset.get('frame_rate')),
|
||||
'height': int_or_none(asset.get('height')),
|
||||
'width': int_or_none(asset.get('width')),
|
||||
})
|
||||
|
||||
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
||||
if mezzanine_url:
|
||||
formats.append({
|
||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||
'format_id': 'mezzanine',
|
||||
'preference': 1,
|
||||
'url': mezzanine_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail in (video.get('thumbnails') or []):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
tags = []
|
||||
for tag in (video.get('tags') or []):
|
||||
display_name = tag.get('displayName')
|
||||
if not display_name:
|
||||
continue
|
||||
tags.append(display_name)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metadata.get('description')),
|
||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
return merge_dicts({
|
||||
'display_id': display_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
}, info)
|
||||
|
||||
|
||||
class IGNVideoIE(InfoExtractor):
|
||||
class IGNVideoIE(IGNBaseIE):
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||
@@ -147,7 +213,8 @@ class IGNVideoIE(InfoExtractor):
|
||||
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||
'timestamp': 1444665600,
|
||||
'upload_date': '20151012',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 400: Bad Request'],
|
||||
}, {
|
||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||
'only_matching': True,
|
||||
@@ -167,22 +234,38 @@ class IGNVideoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
||||
url = self._request_webpage(req, video_id).geturl()
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
embed_url = compat_urlparse.urlunparse(
|
||||
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
|
||||
new_url = urlh.geturl()
|
||||
ign_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
||||
compat_urlparse.urlparse(new_url).query).get('url', [None])[-1]
|
||||
if ign_url:
|
||||
return self.url_result(ign_url, IGNIE.ie_key())
|
||||
return self.url_result(url)
|
||||
video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
|
||||
if not video:
|
||||
if new_url == url:
|
||||
raise ExtractorError('Redirect loop: ' + url)
|
||||
return self.url_result(new_url)
|
||||
video = extract_attributes(video)
|
||||
video_data = video.get('data-settings') or '{}'
|
||||
video_data = self._parse_json(video_data, video_id)['video']
|
||||
info = self._extract_video_info(video_data)
|
||||
|
||||
return merge_dicts({
|
||||
'display_id': video_id,
|
||||
}, info)
|
||||
|
||||
|
||||
class IGNArticleIE(IGNBaseIE):
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||
_PAGE_TYPE = 'article'
|
||||
_TESTS = [{
|
||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
'info_dict': {
|
||||
'id': '524497489e4e8ff5848ece34',
|
||||
'id': '72113',
|
||||
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||
},
|
||||
'playlist': [
|
||||
@@ -190,7 +273,7 @@ class IGNArticleIE(IGNBaseIE):
|
||||
'info_dict': {
|
||||
'id': '5ebbd138523268b93c9141af17bec937',
|
||||
'ext': 'mp4',
|
||||
'title': 'GTA 5 Video Review',
|
||||
'title': 'Grand Theft Auto V Video Review',
|
||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||
'timestamp': 1379339880,
|
||||
'upload_date': '20130916',
|
||||
@@ -200,7 +283,7 @@ class IGNArticleIE(IGNBaseIE):
|
||||
'info_dict': {
|
||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||
'ext': 'mp4',
|
||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||
'title': 'GTA 5 In Slow Motion',
|
||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||
'timestamp': 1386878820,
|
||||
'upload_date': '20131212',
|
||||
@@ -208,16 +291,17 @@ class IGNArticleIE(IGNBaseIE):
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'playlist_items': '2-3',
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Backend fetch failed'],
|
||||
}, {
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'info_dict': {
|
||||
'id': '53ee806780a81ec46e0790f8',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist_count': 1,
|
||||
'expected_warnings': ['Backend fetch failed'],
|
||||
}, {
|
||||
# videoId pattern
|
||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||
@@ -240,18 +324,91 @@ class IGNArticleIE(IGNBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _checked_call_api(self, slug):
|
||||
try:
|
||||
return self._call_api(slug)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
e.cause.args = e.cause.args or [
|
||||
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||
if e.cause.code == 404:
|
||||
raise ExtractorError(
|
||||
'Content not found: expired?', cause=e.cause,
|
||||
expected=True)
|
||||
elif e.cause.code == 503:
|
||||
self.report_warning(error_to_compat_str(e.cause))
|
||||
return
|
||||
raise
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', **kw),
|
||||
video_id, **kw)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
article = self._call_api(display_id)
|
||||
article = self._checked_call_api(display_id)
|
||||
|
||||
def entries():
|
||||
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
||||
if media_url:
|
||||
yield self.url_result(media_url, IGNIE.ie_key())
|
||||
for content in (article.get('content') or []):
|
||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||
yield self.url_result(video_url)
|
||||
if article:
|
||||
# obsolete ?
|
||||
def entries():
|
||||
media_url = traverse_obj(
|
||||
article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
|
||||
expected_type=url_or_none)
|
||||
if media_url:
|
||||
yield self.url_result(media_url, IGNIE.ie_key())
|
||||
for content in (article.get('content') or []):
|
||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||
if url_or_none(video_url):
|
||||
yield self.url_result(video_url)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), article.get('articleId'),
|
||||
traverse_obj(
|
||||
article, ('metadata', 'headline'),
|
||||
expected_type=lambda x: x.strip() or None))
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
|
||||
if playlist_id:
|
||||
|
||||
def entries():
|
||||
for m in re.finditer(
|
||||
r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
|
||||
webpage):
|
||||
flashvars = self._search_regex(
|
||||
r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
|
||||
m.group('params'), 'flashvars', default='')
|
||||
flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
|
||||
v_url = url_or_none((flashvars.get('url') or [None])[-1])
|
||||
if v_url:
|
||||
yield self.url_result(v_url)
|
||||
else:
|
||||
playlist_id = self._search_regex(
|
||||
r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
|
||||
webpage, 'id', group='id', default=None)
|
||||
|
||||
nextjs_data = self._search_nextjs_data(webpage, display_id)
|
||||
|
||||
def entries():
|
||||
for player in traverse_obj(
|
||||
nextjs_data,
|
||||
('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
|
||||
# skip promo links (which may not always be served, eg GH CI servers)
|
||||
if traverse_obj(nextjs_data,
|
||||
('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
|
||||
expected_type=dict):
|
||||
continue
|
||||
video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
|
||||
info = self._extract_video_info(video, fatal=False)
|
||||
if info:
|
||||
yield merge_dicts({
|
||||
'display_id': display_id,
|
||||
}, info)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), article.get('articleId'),
|
||||
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
||||
entries(), playlist_id or display_id,
|
||||
re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)
|
||||
|
@@ -1,6 +1,9 @@
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
@@ -90,7 +93,11 @@ class InfoQIE(BokeCCBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
try:
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
except ExtractorError:
|
||||
fields = {}
|
||||
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
@@ -3,123 +3,266 @@ from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_integer_types,
|
||||
compat_kwargs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
error_to_compat_str,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
JSON_LD_RE,
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_GEO_COUNTRIES = ['GB']
|
||||
class ITVBaseIE(InfoExtractor):
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||
transform_source = kw.pop('transform_source', None)
|
||||
fatal = kw.pop('fatal', True)
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'''<script\b[^>]+\bid=('|")__NEXT_DATA__\1[^>]*>(?P<js>[^<]+)</script>''',
|
||||
webpage, 'next.js data', group='js', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
|
||||
def __handle_request_webpage_error(self, err, video_id=None, errnote=None, fatal=True):
|
||||
if errnote is False:
|
||||
return False
|
||||
if errnote is None:
|
||||
errnote = 'Unable to download webpage'
|
||||
|
||||
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
|
||||
if fatal:
|
||||
raise ExtractorError(errmsg, sys.exc_info()[2], cause=err, video_id=video_id)
|
||||
else:
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _vanilla_ua_header():
|
||||
return {'User-agent': 'Mozilla/5.0'}
|
||||
|
||||
def _download_webpage_handle(self, url, video_id, *args, **kwargs):
|
||||
# specialised to (a) use vanilla UA (b) detect geo-block
|
||||
params = self._downloader.params
|
||||
nkwargs = {}
|
||||
if (
|
||||
'user_agent' not in params
|
||||
and not any(re.match(r'(?i)user-agent\s*:', h)
|
||||
for h in (params.get('headers') or []))
|
||||
and 'User-agent' not in (kwargs.get('headers') or {})):
|
||||
|
||||
kwargs.setdefault('headers', {})
|
||||
kwargs['headers'] = self._vanilla_ua_header()
|
||||
nkwargs = kwargs
|
||||
if kwargs.get('expected_status') is not None:
|
||||
exp = kwargs['expected_status']
|
||||
if isinstance(exp, compat_integer_types):
|
||||
exp = [exp]
|
||||
if isinstance(exp, (list, tuple)) and 403 not in exp:
|
||||
kwargs['expected_status'] = [403]
|
||||
kwargs['expected_status'].extend(exp)
|
||||
nkwargs = kwargs
|
||||
else:
|
||||
kwargs['expected_status'] = 403
|
||||
nkwargs = kwargs
|
||||
|
||||
if nkwargs:
|
||||
kwargs = compat_kwargs(kwargs)
|
||||
|
||||
ret = super(ITVBaseIE, self)._download_webpage_handle(url, video_id, *args, **kwargs)
|
||||
if ret is False:
|
||||
return ret
|
||||
webpage, urlh = ret
|
||||
|
||||
if urlh.getcode() == 403:
|
||||
# geo-block error is like this, with an unnecessary 'Of':
|
||||
# '{\n "Message" : "Request Originated Outside Of Allowed Geographic Region",\
|
||||
# \n "TransactionId" : "oas-magni-475082-xbYF0W"\n}'
|
||||
if '"Request Originated Outside Of Allowed Geographic Region"' in webpage:
|
||||
self.raise_geo_restricted(countries=['GB'])
|
||||
ret = self.__handle_request_webpage_error(
|
||||
compat_HTTPError(urlh.geturl(), 403, 'HTTP Error 403: Forbidden', urlh.headers, urlh),
|
||||
fatal=kwargs.get('fatal'))
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
class ITVIE(ITVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?:(?P<w>watch)|hub)/[^/]+/(?(w)[\w-]+/)(?P<id>\w+)'
|
||||
_IE_DESC = 'ITVX'
|
||||
_TESTS = [{
|
||||
'note': 'Hub URLs redirect to ITVX',
|
||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||
'info_dict': {
|
||||
'id': '2a4547a0012',
|
||||
'ext': 'mp4',
|
||||
'title': 'Liar - Series 2 - Episode 6',
|
||||
'description': 'md5:d0f91536569dec79ea184f0a44cca089',
|
||||
'series': 'Liar',
|
||||
'season_number': 2,
|
||||
'episode_number': 6,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable via data-playlist-url
|
||||
'note': 'Hub page unavailable via data-playlist-url (404 now)',
|
||||
'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# InvalidVodcrid
|
||||
'note': 'Hub page with InvalidVodcrid (404 now)',
|
||||
'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ContentUnavailable
|
||||
'note': 'Hub page with ContentUnavailable (404 now)',
|
||||
'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
|
||||
'only_matching': True,
|
||||
}]
|
||||
}, {
|
||||
'note': 'ITVX, or itvX, show',
|
||||
'url': 'https://www.itv.com/watch/vera/1a7314/1a7314a0014',
|
||||
'md5': 'bd0ad666b2c058fffe7d036785880064',
|
||||
'info_dict': {
|
||||
'id': '1a7314a0014',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vera - Series 3 - Episode 4 - Prodigal Son',
|
||||
'description': 'Vera and her team investigate the fatal stabbing of an ex-Met police officer outside a busy Newcastle nightclub - but there aren\'t many clues.',
|
||||
'timestamp': 1653591600,
|
||||
'upload_date': '20220526',
|
||||
'uploader': 'ITVX',
|
||||
'thumbnail': r're:https://\w+\.itv\.com/images/(?:\w+/)+\d+x\d+\?',
|
||||
'duration': 5340.8,
|
||||
'age_limit': 16,
|
||||
'series': 'Vera',
|
||||
'series_number': 3,
|
||||
'episode': 'Prodigal Son',
|
||||
'episode_number': 4,
|
||||
'channel': 'ITV3',
|
||||
'categories': list,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# 'skip_download': True,
|
||||
},
|
||||
'skip': 'only available in UK',
|
||||
}, {
|
||||
'note': 'Latest ITV news bulletin: details change daily',
|
||||
'url': 'https://www.itv.com/watch/news/varies-but-is-not-checked/6js5d0f',
|
||||
'info_dict': {
|
||||
'id': '6js5d0f',
|
||||
'ext': 'mp4',
|
||||
'title': r're:The latest ITV News headlines - \S.+',
|
||||
'description': r'''re:.* today's top stories from the ITV News team.$''',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:2\d\d\d(?:0[1-9]|1[0-2])(?:[012][1-9]|3[01])',
|
||||
'uploader': 'ITVX',
|
||||
'thumbnail': r're:https://images\.ctfassets\.net/(?:\w+/)+[\w.]+\.(?:jpg|png)',
|
||||
'duration': float,
|
||||
'age_limit': None,
|
||||
},
|
||||
'params': {
|
||||
# variable download
|
||||
# 'skip_download': True,
|
||||
},
|
||||
'skip': 'only available in UK',
|
||||
}
|
||||
]
|
||||
|
||||
def _og_extract(self, webpage, require_title=False):
|
||||
return {
|
||||
'title': self._og_search_title(webpage, fatal=require_title),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'uploader': self._og_search_property('site_name', webpage, default=None),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
|
||||
hmac = params['data-video-hmac']
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# now quite different params!
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'''(<[^>]+\b(?:class|data-testid)\s*=\s*("|')genie-container\2[^>]*>)''',
|
||||
webpage, 'params'))
|
||||
|
||||
ios_playlist_url = traverse_obj(
|
||||
params, 'data-video-id', 'data-video-playlist',
|
||||
get_all=False, expected_type=url_or_none)
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Safari',
|
||||
'model': '5',
|
||||
'manufacturer': 'Mobile Safari',
|
||||
'model': '5.1',
|
||||
'os': {
|
||||
'name': 'Windows NT',
|
||||
'version': '6.1',
|
||||
'type': 'desktop'
|
||||
'name': 'iOS',
|
||||
'version': '5.0',
|
||||
'type': ' mobile'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
'id': 'browser',
|
||||
'supportsAdPods': True,
|
||||
'service': 'itv.x',
|
||||
'appversion': '2.43.28',
|
||||
},
|
||||
'variantAvailability': {
|
||||
'player': 'hls',
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes', 'outband-webvtt'],
|
||||
'max': ['hls', 'aes', 'outband-webvtt']
|
||||
},
|
||||
'platformTag': 'dotcom'
|
||||
'platformTag': 'mobile'
|
||||
}
|
||||
}).encode(), headers=headers)
|
||||
video_data = ios_playlist['Playlist']['Video']
|
||||
ios_base_url = video_data.get('Base')
|
||||
ios_base_url = traverse_obj(video_data, 'Base', expected_type=url_or_none)
|
||||
|
||||
media_url = (
|
||||
(lambda u: url_or_none(urljoin(ios_base_url, u)))
|
||||
if ios_base_url else url_or_none)
|
||||
|
||||
formats = []
|
||||
for media_file in (video_data.get('MediaFiles') or []):
|
||||
href = media_file.get('Href')
|
||||
for media_file in traverse_obj(video_data, 'MediaFiles', expected_type=list) or []:
|
||||
href = traverse_obj(media_file, 'Href', expected_type=media_url)
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
href, video_id, 'mp4', entry_protocol='m3u8',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})
|
||||
f['http_headers'].update(self._vanilla_ua_header())
|
||||
|
||||
subtitles = {}
|
||||
subs = video_data.get('Subtitles') or []
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = url_or_none(sub.get('Href'))
|
||||
for sub in traverse_obj(video_data, 'Subtitles', expected_type=list) or []:
|
||||
href = traverse_obj(sub, 'Href', expected_type=url_or_none)
|
||||
if not href:
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({
|
||||
@@ -127,59 +270,132 @@ class ITVIE(InfoExtractor):
|
||||
'ext': determine_ext(href, 'vtt'),
|
||||
})
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not info:
|
||||
json_ld = self._parse_json(self._search_regex(
|
||||
JSON_LD_RE, webpage, 'JSON-LD', '{}',
|
||||
group='json_ld'), video_id, fatal=False)
|
||||
if json_ld and json_ld.get('@type') == 'BreadcrumbList':
|
||||
for ile in (json_ld.get('itemListElement:') or []):
|
||||
item = ile.get('item:') or {}
|
||||
if item.get('@type') == 'TVEpisode':
|
||||
item['@context'] = 'http://schema.org'
|
||||
info = self._json_ld(item, video_id, fatal=False) or {}
|
||||
break
|
||||
next_data = self._search_nextjs_data(webpage, video_id, fatal=False, default='{}')
|
||||
video_data.update(traverse_obj(next_data, ('props', 'pageProps', ('title', 'episode')), expected_type=dict)[0] or {})
|
||||
title = traverse_obj(video_data, 'headerTitle', 'episodeTitle')
|
||||
info = self._og_extract(webpage, require_title=not title)
|
||||
tn = info.pop('thumbnail', None)
|
||||
if tn:
|
||||
info['thumbnails'] = [{'url': tn}]
|
||||
|
||||
# num. episode title
|
||||
num_ep_title = video_data.get('numberedEpisodeTitle')
|
||||
if not num_ep_title:
|
||||
num_ep_title = clean_html(get_element_by_attribute('data-testid', 'episode-hero-description-strong', webpage))
|
||||
num_ep_title = num_ep_title and num_ep_title.rstrip(' -')
|
||||
ep_title = strip_or_none(
|
||||
video_data.get('episodeTitle')
|
||||
or (num_ep_title.split('.', 1)[-1] if num_ep_title else None))
|
||||
title = title or re.sub(r'\s+-\s+ITVX$', '', info['title'])
|
||||
if ep_title and ep_title != title:
|
||||
title = title + ' - ' + ep_title
|
||||
|
||||
def get_thumbnails():
|
||||
tns = []
|
||||
for w, x in (traverse_obj(video_data, ('imagePresets'), expected_type=dict) or {}).items():
|
||||
if isinstance(x, dict):
|
||||
for y, z in x.items():
|
||||
tns.append({'id': w + '_' + y, 'url': z})
|
||||
return tns or None
|
||||
|
||||
video_str = lambda *x: traverse_obj(
|
||||
video_data, *x, get_all=False, expected_type=strip_or_none)
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': parse_duration(video_data.get('Duration')),
|
||||
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
|
||||
# parsing hh:mm:ss:nnn not yet patched
|
||||
'duration': parse_duration(re.sub(r'(\d{2})(:)(\d{3}$)', r'\1.\3', video_data.get('Duration') or '')),
|
||||
'description': video_str('synopsis'),
|
||||
'timestamp': traverse_obj(video_data, 'broadcastDateTime', 'dateTime', expected_type=parse_iso8601),
|
||||
'thumbnails': get_thumbnails(),
|
||||
'series': video_str('showTitle', 'programmeTitle'),
|
||||
'series_number': int_or_none(video_data.get('seriesNumber')),
|
||||
'episode': ep_title,
|
||||
'episode_number': int_or_none((num_ep_title or '').split('.')[0]),
|
||||
'channel': video_str('channel'),
|
||||
'categories': traverse_obj(video_data, ('categories', 'formatted'), expected_type=list),
|
||||
'age_limit': {False: 16, True: 0}.get(video_data.get('isChildrenCategory')),
|
||||
}, info)
|
||||
|
||||
|
||||
class ITVBTCCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
||||
class ITVBTCCIE(ITVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/(?!(?:watch|hub)/)(?:[^/]+/)+(?P<id>[^/?#&]+)'
|
||||
_IE_DESC = 'ITV articles: News, British Touring Car Championship'
|
||||
_TESTS = [{
|
||||
'note': 'British Touring Car Championship',
|
||||
'url': 'https://www.itv.com/btcc/articles/btcc-2018-all-the-action-from-brands-hatch',
|
||||
'info_dict': {
|
||||
'id': 'btcc-2018-all-the-action-from-brands-hatch',
|
||||
'title': 'BTCC 2018: All the action from Brands Hatch',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
|
||||
}, {
|
||||
'note': 'redirects to /btcc/articles/...',
|
||||
'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'news article',
|
||||
'url': 'https://www.itv.com/news/wales/2020-07-23/sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
|
||||
'info_dict': {
|
||||
'id': 'sean-fletcher-shows-off-wales-coastline-in-new-itv-series-as-british-tourists-opt-for-staycations',
|
||||
'title': '''Sean Fletcher on why Wales' coastline should be your 'staycation' destination | ITV News''',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
# should really be a class var of the BC IE
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_ACCOUNT = '1582188683001'
|
||||
BRIGHTCOVE_PLAYER = 'HkiHLnNRx'
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
link = compat_urlparse.urlparse(urlh.geturl()).path.strip('/')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
|
||||
# ITV does not like some GB IP ranges, so here are some
|
||||
# IP blocks it accepts
|
||||
'geo_ip_blocks': [
|
||||
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
|
||||
],
|
||||
'referrer': url,
|
||||
}),
|
||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||
for video_id in re.findall(r'data-video-id=["\'](\d+)', webpage)]
|
||||
next_data = self._search_nextjs_data(webpage, playlist_id, fatal=False, default='{}')
|
||||
path_prefix = compat_urlparse.urlparse(next_data.get('assetPrefix') or '').path.strip('/')
|
||||
link = remove_start(link, path_prefix).strip('/')
|
||||
|
||||
content = traverse_obj(
|
||||
next_data, ('props', 'pageProps', Ellipsis),
|
||||
expected_type=lambda x: x if x['link'] == link else None,
|
||||
get_all=False, default={})
|
||||
content = traverse_obj(
|
||||
content, ('body', 'content', Ellipsis, 'data'),
|
||||
expected_type=lambda x: x if x.get('name') == 'Brightcove' or x.get('type') == 'Brightcove' else None)
|
||||
|
||||
contraband = {
|
||||
# ITV does not like some GB IP ranges, so here are some
|
||||
# IP blocks it accepts
|
||||
'geo_ip_blocks': [
|
||||
'193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
|
||||
],
|
||||
'referrer': urlh.geturl(),
|
||||
}
|
||||
|
||||
def entries():
|
||||
|
||||
for data in content or []:
|
||||
video_id = data.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
account = data.get('accountId') or self.BRIGHTCOVE_ACCOUNT
|
||||
player = data.get('playerId') or self.BRIGHTCOVE_PLAYER
|
||||
yield self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account, player, video_id), contraband),
|
||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||
|
||||
# obsolete ?
|
||||
for video_id in re.findall(r'''data-video-id=["'](\d+)''', webpage):
|
||||
yield self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (self.BRIGHTCOVE_ACCOUNT, self.BRIGHTCOVE_PLAYER, video_id), contraband),
|
||||
ie=BrightcoveNewIE.ie_key(), video_id=video_id)
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
return self.playlist_result(entries(), playlist_id, title)
|
||||
|
@@ -373,5 +373,5 @@ class KalturaIE(InfoExtractor):
|
||||
'duration': info.get('duration'),
|
||||
'timestamp': info.get('createdAt'),
|
||||
'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
|
||||
'view_count': info.get('plays'),
|
||||
'view_count': int_or_none(info.get('plays')),
|
||||
}
|
||||
|
35
youtube_dl/extractor/kommunetv.py
Normal file
35
youtube_dl/extractor/kommunetv.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import update_url
|
||||
|
||||
|
||||
class KommunetvIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
|
||||
_TEST = {
|
||||
'url': 'https://oslo.kommunetv.no/archive/921',
|
||||
'md5': '5f102be308ee759be1e12b63d5da4bbc',
|
||||
'info_dict': {
|
||||
'id': '921',
|
||||
'title': 'Bystyremøte',
|
||||
'ext': 'mp4'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
headers = {
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
|
||||
title = data['stream']['title']
|
||||
file = data['playlist'][0]['playlist'][0]['file']
|
||||
url = update_url(file, query=None, fragment=None)
|
||||
formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title
|
||||
}
|
31
youtube_dl/extractor/kth.py
Normal file
31
youtube_dl/extractor/kth.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class KTHIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
|
||||
_TEST = {
|
||||
'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
|
||||
'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
|
||||
'info_dict': {
|
||||
'id': '0_uoop6oz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
|
||||
'thumbnail': 're:https?://.+/thumbnail/.+',
|
||||
'duration': 3516,
|
||||
'timestamp': 1647345358,
|
||||
'upload_date': '20220315',
|
||||
'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
result = self.url_result(
|
||||
smuggle_url('kaltura:308:%s' % video_id, {
|
||||
'service_url': 'https://api.kaltura.nordu.net'}),
|
||||
'Kaltura')
|
||||
return result
|
@@ -1,11 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -20,17 +25,20 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '133957',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
||||
'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
|
||||
'info_dict': {
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
@@ -39,17 +47,50 @@ class ManyVidsIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video URL', group='url')
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True)
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, compat_str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
@@ -62,7 +103,8 @@ class ManyVidsIE(InfoExtractor):
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, fatal=False, data=urlencode_postdata({
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
@@ -70,23 +112,56 @@ class ManyVidsIE(InfoExtractor):
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
})
|
||||
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
})
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
||||
'view count', default=None))
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
}
|
||||
|
@@ -24,7 +24,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
player(?:/v\d+)?/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9A-Z]{16,})
|
||||
'''
|
||||
@@ -73,6 +73,10 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# embedUrl (from https://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/)
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323&autoplay=true&purl=http://www.wittytv.it/amici/est-ce-que-tu-maimes-gabriele-5-dicembre-copia/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mediaset:FAFU000000665924',
|
||||
'only_matching': True,
|
||||
|
@@ -78,7 +78,7 @@ class MindsIE(MindsBaseIE):
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
assert (entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
@@ -71,7 +72,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'uploader_id': 'anonymous',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -125,9 +126,10 @@ class MotherlessIE(InfoExtractor):
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
|
||||
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||
webpage, 'uploader_id')
|
||||
|
||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||
@@ -169,7 +171,18 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
|
||||
'any kind!'
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 0,
|
||||
'expected_warnings': [
|
||||
'This group has no videos.',
|
||||
]
|
||||
}, {
|
||||
'url': 'https://motherless.com/g/beautiful_cock',
|
||||
'info_dict': {
|
||||
'id': 'beautiful_cock',
|
||||
'title': 'Beautiful Cock',
|
||||
'description': 'Group for lovely cocks yours, mine, a friends anything human',
|
||||
},
|
||||
'playlist_mincount': 2500,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -208,16 +221,23 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, fatal=False)
|
||||
page_count = self._int(self._search_regex(
|
||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
|
||||
webpage, 'page_count'), 'page_count')
|
||||
page_count = str_to_int(self._search_regex(
|
||||
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||
webpage, 'page_count', default=0))
|
||||
if not page_count:
|
||||
message = self._search_regex(
|
||||
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||
self.report_warning(message, group_id)
|
||||
page_count = 1
|
||||
PAGE_SIZE = 80
|
||||
|
||||
def _get_page(idx):
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
if idx > 0:
|
||||
webpage = self._download_webpage(
|
||||
page_url, group_id, query={'page': idx + 1},
|
||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||
)
|
||||
for entry in self._extract_entries(webpage, url):
|
||||
yield entry
|
||||
|
||||
|
@@ -35,7 +35,9 @@ class MySpassIE(InfoExtractor):
|
||||
title = xpath_text(metadata, 'title', fatal=True)
|
||||
video_url = xpath_text(metadata, 'url_flv', 'download url', True)
|
||||
video_id_int = int(video_id)
|
||||
for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
|
||||
|
||||
grps = re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url)
|
||||
for group in grps.groups() if grps else []:
|
||||
group_int = int(group)
|
||||
if group_int > video_id_int:
|
||||
video_url = video_url.replace(
|
||||
|
87
youtube_dl/extractor/myvideoge.py
Normal file
87
youtube_dl/extractor/myvideoge.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_id,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
MONTH_NAMES,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class MyVideoGeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?myvideo\.ge/v/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.myvideo.ge/v/3941048',
|
||||
'md5': '8c192a7d2b15454ba4f29dc9c9a52ea9',
|
||||
'info_dict': {
|
||||
'id': '3941048',
|
||||
'ext': 'mp4',
|
||||
'title': 'The best prikol',
|
||||
'upload_date': '20200611',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'chixa33',
|
||||
'description': 'md5:5b067801318e33c2e6eea4ab90b1fdd3',
|
||||
},
|
||||
# working from local dev system
|
||||
'skip': 'site blocks CI servers',
|
||||
}
|
||||
_MONTH_NAMES_KA = ['იანვარი', 'თებერვალი', 'მარტი', 'აპრილი', 'მაისი', 'ივნისი', 'ივლისი', 'აგვისტო', 'სექტემბერი', 'ოქტომბერი', 'ნოემბერი', 'დეკემბერი']
|
||||
|
||||
_quality = staticmethod(qualities(('SD', 'HD')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = (
|
||||
self._og_search_title(webpage, default=None)
|
||||
or clean_html(get_element_by_class('my_video_title', webpage))
|
||||
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title\b', webpage, 'title'))
|
||||
|
||||
jwplayer_sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'''(?s)jwplayer\s*\(\s*['"]mvplayer['"]\s*\)\s*\.\s*setup\s*\(.*?\bsources\s*:\s*(\[.*?])\s*[,});]''', webpage, 'jwplayer sources', fatal=False)
|
||||
or '',
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
formats = self._parse_jwplayer_formats(jwplayer_sources or [], video_id)
|
||||
for f in formats or []:
|
||||
f['preference'] = self._quality(f['format_id'])
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = (
|
||||
self._og_search_description(webpage)
|
||||
or get_element_by_id('long_desc_holder', webpage)
|
||||
or self._html_search_meta('description', webpage))
|
||||
|
||||
uploader = self._search_regex(r'<a[^>]+class="mv_user_name"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
|
||||
|
||||
upload_date = get_element_by_class('mv_vid_upl_date', webpage)
|
||||
# as ka locale may not be present roll a local date conversion
|
||||
upload_date = (unified_strdate(
|
||||
# translate any ka month to an en one
|
||||
re.sub('|'.join(self._MONTH_NAMES_KA),
|
||||
lambda m: MONTH_NAMES['en'][self._MONTH_NAMES_KA.index(m.group(0))],
|
||||
upload_date, re.I))
|
||||
if upload_date else None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'upload_date': upload_date,
|
||||
'view_count': int_or_none(get_element_by_class('mv_vid_views', webpage)),
|
||||
'like_count': int_or_none(get_element_by_id('likes_count', webpage)),
|
||||
'dislike_count': int_or_none(get_element_by_id('dislikes_count', webpage)),
|
||||
}
|
@@ -4,8 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
@@ -20,13 +22,13 @@ class NDRBaseIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = next(group for group in mobj.groups() if group)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return self._extract_embed(webpage, display_id)
|
||||
return self._extract_embed(webpage, display_id, url)
|
||||
|
||||
|
||||
class NDRIE(NDRBaseIE):
|
||||
IE_NAME = 'ndr'
|
||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||
_TESTS = [{
|
||||
# httpVideo, same content id
|
||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||
@@ -38,13 +40,14 @@ class NDRIE(NDRBaseIE):
|
||||
'title': 'Party, Pötte und Parade',
|
||||
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
||||
'uploader': 'ndrtv',
|
||||
'timestamp': 1431108900,
|
||||
'timestamp': 1431255671,
|
||||
'upload_date': '20150510',
|
||||
'duration': 3498,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# httpVideo, different content id
|
||||
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
||||
@@ -63,6 +66,7 @@ class NDRIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpAudio, same content id
|
||||
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
||||
@@ -74,8 +78,8 @@ class NDRIE(NDRBaseIE):
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||
'uploader': 'ndrinfo',
|
||||
'timestamp': 1290626100,
|
||||
'upload_date': '20140729',
|
||||
'timestamp': 1631711863,
|
||||
'upload_date': '20210915',
|
||||
'duration': 884,
|
||||
},
|
||||
'params': {
|
||||
@@ -89,9 +93,10 @@ class NDRIE(NDRBaseIE):
|
||||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||
'description': 'md5:700f6de264010585012a72f97b0ac0c9',
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20201113',
|
||||
'upload_date': '20201207',
|
||||
'timestamp': 1614349457,
|
||||
'duration': 1749,
|
||||
'subtitles': {
|
||||
'de': [{
|
||||
@@ -109,19 +114,38 @@ class NDRIE(NDRBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
embed_url = self._html_search_meta(
|
||||
'embedURL', webpage, 'embed URL',
|
||||
default=None) or self._search_regex(
|
||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url')
|
||||
def _extract_embed(self, webpage, display_id, url):
|
||||
embed_url = (
|
||||
self._html_search_meta(
|
||||
'embedURL', webpage, 'embed URL',
|
||||
default=None)
|
||||
or self._search_regex(
|
||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url', default=None)
|
||||
or self._search_regex(
|
||||
r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url', default=''))
|
||||
# some more work needed if we only found sophoraID
|
||||
if re.match(r'^[a-z]+\d+$', embed_url):
|
||||
# get the initial part of the url path,. eg /panorama/archiv/2022/
|
||||
parsed_url = compat_urllib_parse_urlparse(url)
|
||||
path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
|
||||
# find tell-tale image with the actual ID
|
||||
ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
|
||||
# or try to use special knowledge!
|
||||
NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
|
||||
embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
|
||||
if not embed_url:
|
||||
raise ExtractorError('Unable to extract embedUrl')
|
||||
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(
|
||||
self._search_regex(
|
||||
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
|
||||
webpage, 'upload date', default=None))
|
||||
(r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
|
||||
r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
|
||||
webpage, 'upload date', group='cont', default=None))
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
@@ -153,19 +177,19 @@ class NJoyIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpVideo, different content id
|
||||
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
||||
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
||||
'info_dict': {
|
||||
'id': 'dockville882',
|
||||
'id': 'livestream283',
|
||||
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
||||
'ext': 'mp4',
|
||||
'title': '"Ich hab noch nie" mit Felix Jaehn',
|
||||
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
|
||||
'ext': 'mp3',
|
||||
'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
|
||||
'description': 'md5:681698f527b8601e511e7b79edde7d2c',
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150822',
|
||||
'duration': 211,
|
||||
'upload_date': '20210830',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -175,18 +199,25 @@ class NJoyIE(NDRBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
def _extract_embed(self, webpage, display_id, url=None):
|
||||
# find tell-tale URL with the actual ID, or ...
|
||||
video_id = self._search_regex(
|
||||
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
|
||||
description = self._search_regex(
|
||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||
webpage, 'description', fatal=False)
|
||||
(r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
|
||||
r'<iframe[^>]+id="pp_([\da-z]+)"', ),
|
||||
webpage, 'NDR id', default=None)
|
||||
|
||||
description = (
|
||||
self._html_search_meta('description', webpage)
|
||||
or self._search_regex(
|
||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||
webpage, 'description', fatal=False))
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'NDREmbedBase',
|
||||
'url': 'ndr:%s' % video_id,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'title': display_id.replace('-', ' ').strip(),
|
||||
}
|
||||
|
||||
|
||||
@@ -291,7 +322,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
|
||||
class NDREmbedIE(NDREmbedBaseIE):
|
||||
IE_NAME = 'ndr:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||
@@ -304,6 +335,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'upload_date': '20150907',
|
||||
'duration': 132,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
||||
'md5': '002085c44bae38802d94ae5802a36e78',
|
||||
@@ -319,6 +351,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||
@@ -328,7 +361,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'title': 'La Valette entgeht der Hinrichtung',
|
||||
'is_live': False,
|
||||
'uploader': 'ndrinfo',
|
||||
'upload_date': '20140729',
|
||||
'upload_date': '20210915',
|
||||
'duration': 884,
|
||||
},
|
||||
'params': {
|
||||
@@ -349,15 +382,17 @@ class NDREmbedIE(NDREmbedBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpVideoLive
|
||||
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
||||
'info_dict': {
|
||||
'id': 'livestream217',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'upload_date': '20150910',
|
||||
'upload_date': '20210409',
|
||||
'uploader': 'ndrtv',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -395,9 +430,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
||||
'is_live': False,
|
||||
'upload_date': '20150807',
|
||||
'upload_date': '20200826',
|
||||
'duration': 1011,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
# httpAudio
|
||||
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
||||
@@ -414,6 +450,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
}, {
|
||||
# httpAudioLive, no explicit ext
|
||||
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
||||
@@ -423,7 +460,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
||||
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'is_live': True,
|
||||
'uploader': 'njoy',
|
||||
'upload_date': '20150810',
|
||||
'upload_date': '20210830',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@@ -1,20 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from hashlib import md5
|
||||
from base64 import b64encode
|
||||
from binascii import hexlify
|
||||
from datetime import datetime
|
||||
from hashlib import md5
|
||||
from random import randint
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_itertools_count,
|
||||
)
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,32 +47,106 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
result = b64encode(m.digest()).decode('ascii')
|
||||
return result.replace('/', '_').replace('+', '-')
|
||||
|
||||
@classmethod
|
||||
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
|
||||
KEY = b'e82ckenh8dichen8'
|
||||
URL = '/api/song/enhance/player/url'
|
||||
now = int(time.time() * 1000)
|
||||
rand = randint(0, 1000)
|
||||
cookie = {
|
||||
'osver': None,
|
||||
'deviceId': None,
|
||||
'appver': '8.0.0',
|
||||
'versioncode': '140',
|
||||
'mobilename': None,
|
||||
'buildver': '1623435496',
|
||||
'resolution': '1920x1080',
|
||||
'__csrf': '',
|
||||
'os': 'pc',
|
||||
'channel': None,
|
||||
'requestId': '{0}_{1:04}'.format(now, rand),
|
||||
}
|
||||
request_text = json.dumps(
|
||||
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
|
||||
separators=(',', ':'))
|
||||
message = 'nobody{0}use{1}md5forencrypt'.format(
|
||||
URL, request_text).encode('latin1')
|
||||
msg_digest = md5(message).hexdigest()
|
||||
|
||||
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
|
||||
URL, request_text, msg_digest)
|
||||
data = pkcs7_padding(bytes_to_intlist(data))
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
|
||||
encrypted_params = hexlify(encrypted).decode('ascii').upper()
|
||||
|
||||
cookie = '; '.join(
|
||||
['{0}={1}'.format(k, v if v is not None else 'undefined')
|
||||
for [k, v] in cookie.items()])
|
||||
|
||||
headers = {
|
||||
'User-Agent': std_headers['User-Agent'],
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': 'https://music.163.com',
|
||||
'Cookie': cookie,
|
||||
}
|
||||
return ('params={0}'.format(encrypted_params), headers)
|
||||
|
||||
def _call_player_api(self, song_id, bitrate):
|
||||
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
|
||||
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
|
||||
try:
|
||||
msg = 'empty result'
|
||||
result = self._download_json(
|
||||
url, song_id, data=data.encode('ascii'), headers=headers)
|
||||
if result:
|
||||
return result
|
||||
except ExtractorError as e:
|
||||
if type(e.cause) in (ValueError, TypeError):
|
||||
# JSON load failure
|
||||
raise
|
||||
except Exception as e:
|
||||
msg = error_to_compat_str(e)
|
||||
self.report_warning('%s API call (%s) failed: %s' % (
|
||||
song_id, bitrate, msg))
|
||||
return {}
|
||||
|
||||
def extract_formats(self, info):
|
||||
err = 0
|
||||
formats = []
|
||||
song_id = info['id']
|
||||
for song_format in self._FORMATS:
|
||||
details = info.get(song_format)
|
||||
if not details:
|
||||
continue
|
||||
song_file_path = '/%s/%s.%s' % (
|
||||
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
|
||||
|
||||
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
|
||||
# from NetEase's CDN provider that can be used if m5.music.126.net does not
|
||||
# work, especially for users outside of Mainland China
|
||||
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
|
||||
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
|
||||
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
|
||||
song_url = host + song_file_path
|
||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||
data = self._call_player_api(song_id, bitrate)
|
||||
for song in try_get(data, lambda x: x['data'], list) or []:
|
||||
song_url = try_get(song, lambda x: x['url'])
|
||||
if not song_url:
|
||||
continue
|
||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||
formats.append({
|
||||
'url': song_url,
|
||||
'ext': details.get('extension'),
|
||||
'abr': float_or_none(details.get('bitrate'), scale=1000),
|
||||
'abr': float_or_none(song.get('br'), scale=1000),
|
||||
'format_id': song_format,
|
||||
'filesize': details.get('size'),
|
||||
'asr': details.get('sr')
|
||||
'filesize': int_or_none(song.get('size')),
|
||||
'asr': int_or_none(details.get('sr')),
|
||||
})
|
||||
break
|
||||
elif err == 0:
|
||||
err = try_get(song, lambda x: x['code'], int)
|
||||
|
||||
if not formats:
|
||||
msg = 'No media links found'
|
||||
if err != 0 and (err < 200 or err >= 400):
|
||||
raise ExtractorError(
|
||||
'%s (site code %d)' % (msg, err, ), expected=True)
|
||||
else:
|
||||
self.raise_geo_restricted(
|
||||
msg + ': probably this video is not available from your location due to geo restriction.',
|
||||
countries=['CN'])
|
||||
|
||||
return formats
|
||||
|
||||
@classmethod
|
||||
@@ -76,33 +162,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
IE_NAME = 'netease:song'
|
||||
IE_DESC = '网易云音乐'
|
||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://music.163.com/#/song?id=32102397',
|
||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
||||
'md5': '3e909614ce09b1ccef4a3eb205441190',
|
||||
'info_dict': {
|
||||
'id': '32102397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
||||
'title': 'Bad Blood',
|
||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||
'upload_date': '20150517',
|
||||
'timestamp': 1431878400,
|
||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
||||
'upload_date': '20150516',
|
||||
'timestamp': 1431792000,
|
||||
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics translation.',
|
||||
'url': 'http://music.163.com/#/song?id=29822014',
|
||||
'info_dict': {
|
||||
'id': '29822014',
|
||||
'ext': 'mp3',
|
||||
'title': '听见下雨的声音',
|
||||
'creator': '周杰伦',
|
||||
'upload_date': '20141225',
|
||||
'timestamp': 1419523200,
|
||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'No lyrics.',
|
||||
'url': 'http://music.163.com/song?id=17241424',
|
||||
@@ -112,9 +184,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'title': 'Opus 28',
|
||||
'creator': 'Dustin O\'Halloran',
|
||||
'upload_date': '20080211',
|
||||
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
|
||||
'timestamp': 1202745600,
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'note': 'Has translated name.',
|
||||
'url': 'http://music.163.com/#/song?id=22735043',
|
||||
@@ -128,7 +200,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||
'timestamp': 1264608000,
|
||||
'alt_title': '说出愿望吧(Genie)',
|
||||
},
|
||||
'skip': 'Blocked outside Mainland China',
|
||||
}, {
|
||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||
'info_dict': {
|
||||
'id': '95670',
|
||||
'ext': 'mp3',
|
||||
'title': '国际歌',
|
||||
'creator': '马备',
|
||||
'upload_date': '19911130',
|
||||
'timestamp': 691516800,
|
||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||
},
|
||||
}]
|
||||
|
||||
def _process_lyrics(self, lyrics_info):
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -7,7 +8,7 @@ from ..utils import urljoin
|
||||
|
||||
|
||||
class NhkBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
|
||||
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
|
||||
_TYPE_REGEX = r'/(?P<type>video|audio)/'
|
||||
|
||||
@@ -23,7 +24,7 @@ class NhkBaseIE(InfoExtractor):
|
||||
def _extract_episode_info(self, url, episode=None):
|
||||
fetch_episode = episode is None
|
||||
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||
if episode_id.isdigit():
|
||||
if len(episode_id) == 7:
|
||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||
|
||||
is_video = m_type == 'video'
|
||||
@@ -84,7 +85,8 @@ class NhkBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class NhkVodIE(NhkBaseIE):
|
||||
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
|
||||
_VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
@@ -124,6 +126,19 @@ class NhkVodIE(NhkBaseIE):
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video, alphabetic character in ID #29670
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': 'qfjay6cg',
|
||||
'ext': 'mp4',
|
||||
'title': 'DESIGN TALKS plus - Fishermen’s Finery',
|
||||
'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
|
||||
'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
|
||||
'upload_date': '20210615',
|
||||
'timestamp': 1623722008,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -2,25 +2,28 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
PostProcessingError,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
@@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||
'info_dict': {
|
||||
'id': 'sm22312215',
|
||||
'ext': 'mp4',
|
||||
@@ -157,11 +160,34 @@ class NiconicoIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DMC video with heartbeat
|
||||
'url': 'https://www.nicovideo.jp/watch/sm34815188',
|
||||
'md5': '9360c6e1f1519d7759e2fe8e1326ae83',
|
||||
'info_dict': {
|
||||
'id': 'sm34815188',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:aee93e9f3366db72f902f6cd5d389cb7',
|
||||
'description': 'md5:7b9149fc7a00ab053cafaf5c19662704',
|
||||
'thumbnail': r're:https?://.*',
|
||||
'uploader': 'md5:2762e18fa74dbb40aa1ad27c6291ee32',
|
||||
'uploader_id': '67449889',
|
||||
'upload_date': '20190322',
|
||||
'timestamp': int, # timestamp is unstable
|
||||
'duration': 1082.0,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0'
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -191,37 +217,89 @@ class NiconicoIE(InfoExtractor):
|
||||
self._downloader.report_warning('unable to log in: bad username or password')
|
||||
return login_ok
|
||||
|
||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||
def yesno(boolean):
|
||||
return 'yes' if boolean else 'no'
|
||||
def _get_heartbeat_info(self, info_dict):
|
||||
|
||||
session_api_data = api_data['video']['dmcInfo']['session_api']
|
||||
session_api_endpoint = session_api_data['urls'][0]
|
||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||
|
||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||
api_data = (
|
||||
info_dict.get('_api_data')
|
||||
or self._parse_json(
|
||||
self._html_search_regex(
|
||||
'data-api-data="([^"]+)"',
|
||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
'API data', default='{}'),
|
||||
video_id))
|
||||
|
||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||
|
||||
def ping():
|
||||
status = try_get(
|
||||
self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
|
||||
query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
|
||||
note='Acquiring permission for downloading video',
|
||||
headers=self._API_HEADERS),
|
||||
lambda x: x['meta']['status'])
|
||||
if status != 200:
|
||||
self.report_warning('Failed to acquire permission for playing video. The video may not download.')
|
||||
|
||||
yesno = lambda x: 'yes' if x else 'no'
|
||||
|
||||
# m3u8 (encryption)
|
||||
if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
|
||||
protocol = 'm3u8'
|
||||
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
|
||||
session_api_http_parameters = {
|
||||
'parameters': {
|
||||
'hls_parameters': {
|
||||
'encryption': {
|
||||
encryption: {
|
||||
'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
|
||||
'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
|
||||
}
|
||||
},
|
||||
'transfer_preset': '',
|
||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||
'segment_duration': 6000,
|
||||
}
|
||||
}
|
||||
}
|
||||
# http
|
||||
else:
|
||||
protocol = 'http'
|
||||
session_api_http_parameters = {
|
||||
'parameters': {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
session_response = self._download_json(
|
||||
session_api_endpoint['url'], video_id,
|
||||
query={'_format': 'json'},
|
||||
headers={'Content-Type': 'application/json'},
|
||||
note='Downloading JSON metadata for %s' % format_id,
|
||||
note='Downloading JSON metadata for %s' % info_dict['format_id'],
|
||||
data=json.dumps({
|
||||
'session': {
|
||||
'client_info': {
|
||||
'player_id': session_api_data['player_id'],
|
||||
'player_id': session_api_data.get('playerId'),
|
||||
},
|
||||
'content_auth': {
|
||||
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
||||
'content_key_timeout': session_api_data['content_key_timeout'],
|
||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||
'service_id': 'nicovideo',
|
||||
'service_user_id': session_api_data['service_user_id']
|
||||
'service_user_id': session_api_data.get('serviceUserId')
|
||||
},
|
||||
'content_id': session_api_data['content_id'],
|
||||
'content_id': session_api_data.get('contentId'),
|
||||
'content_src_id_sets': [{
|
||||
'content_src_ids': [{
|
||||
'src_id_to_mux': {
|
||||
'audio_src_ids': [audio_quality['id']],
|
||||
'video_src_ids': [video_quality['id']],
|
||||
'audio_src_ids': [audio_src_id],
|
||||
'video_src_ids': [video_src_id],
|
||||
}
|
||||
}]
|
||||
}],
|
||||
@@ -229,52 +307,81 @@ class NiconicoIE(InfoExtractor):
|
||||
'content_uri': '',
|
||||
'keep_method': {
|
||||
'heartbeat': {
|
||||
'lifetime': session_api_data['heartbeat_lifetime']
|
||||
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||
}
|
||||
},
|
||||
'priority': session_api_data['priority'],
|
||||
'priority': session_api_data.get('priority'),
|
||||
'protocol': {
|
||||
'name': 'http',
|
||||
'parameters': {
|
||||
'http_parameters': {
|
||||
'parameters': {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
||||
}
|
||||
}
|
||||
}
|
||||
'http_parameters': session_api_http_parameters
|
||||
}
|
||||
},
|
||||
'recipe_id': session_api_data['recipe_id'],
|
||||
'recipe_id': session_api_data.get('recipeId'),
|
||||
'session_operation_auth': {
|
||||
'session_operation_auth_by_signature': {
|
||||
'signature': session_api_data['signature'],
|
||||
'token': session_api_data['token'],
|
||||
'signature': session_api_data.get('signature'),
|
||||
'token': session_api_data.get('token'),
|
||||
}
|
||||
},
|
||||
'timing_constraint': 'unlimited'
|
||||
}
|
||||
}).encode())
|
||||
|
||||
resolution = video_quality.get('resolution', {})
|
||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# get heartbeat info
|
||||
heartbeat_info_dict = {
|
||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||
'data': json.dumps(session_response['data']),
|
||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||
'ping': ping
|
||||
}
|
||||
|
||||
return info_dict, heartbeat_info_dict
|
||||
|
||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||
def parse_format_id(id_code):
|
||||
mobj = re.match(r'''(?x)
|
||||
(?:archive_)?
|
||||
(?:(?P<codec>[^_]+)_)?
|
||||
(?:(?P<br>[\d]+)kbps_)?
|
||||
(?:(?P<res>[\d+]+)p_)?
|
||||
''', '%s_' % id_code)
|
||||
return mobj.groupdict() if mobj else {}
|
||||
|
||||
protocol = 'niconico_dmc'
|
||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||
vdict = parse_format_id(video_quality['id'])
|
||||
adict = parse_format_id(audio_quality['id'])
|
||||
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||
|
||||
return {
|
||||
'url': session_response['data']['session']['content_uri'],
|
||||
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||
'format_id': format_id,
|
||||
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
||||
'height': resolution.get('height'),
|
||||
'width': resolution.get('width'),
|
||||
'vcodec': vdict.get('codec'),
|
||||
'acodec': adict.get('codec'),
|
||||
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||
'width': int_or_none(resolution.get('width')),
|
||||
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
|
||||
'protocol': protocol,
|
||||
'http_headers': {
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Get video webpage. We are not actually interested in it for normal
|
||||
# cases, but need the cookies in order to be able to download the
|
||||
# info webpage
|
||||
# Get video webpage for API data.
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
if video_id.startswith('so'):
|
||||
@@ -284,86 +391,136 @@ class NiconicoIE(InfoExtractor):
|
||||
'data-api-data="([^"]+)"', webpage,
|
||||
'API data', default='{}'), video_id)
|
||||
|
||||
def _format_id_from_url(video_url):
|
||||
return 'economy' if video_real_url.endswith('low') else 'normal'
|
||||
def get_video_info_web(items):
|
||||
return dict_get(api_data['video'], items)
|
||||
|
||||
try:
|
||||
video_real_url = api_data['video']['smileInfo']['url']
|
||||
except KeyError: # Flash videos
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
# Get video info
|
||||
video_info_xml = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||
video_id, note='Downloading video info page')
|
||||
|
||||
flv_info = compat_parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
elif 'closed' in flv_info:
|
||||
raise ExtractorError('Niconico videos now require logging in',
|
||||
expected=True)
|
||||
elif 'error' in flv_info:
|
||||
raise ExtractorError('%s reports error: %s' % (
|
||||
self.IE_NAME, flv_info['error'][0]), expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find video URL')
|
||||
def get_video_info_xml(items):
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
for item in items:
|
||||
ret = xpath_text(video_info_xml, './/' + item)
|
||||
if ret:
|
||||
return ret
|
||||
|
||||
video_info_xml = self._download_xml(
|
||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||
video_id, note='Downloading video info page')
|
||||
if get_video_info_xml('error'):
|
||||
error_code = get_video_info_xml('code')
|
||||
|
||||
def get_video_info(items):
|
||||
if not isinstance(items, list):
|
||||
items = [items]
|
||||
for item in items:
|
||||
ret = xpath_text(video_info_xml, './/' + item)
|
||||
if ret:
|
||||
return ret
|
||||
if error_code == 'DELETED':
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
expected=True)
|
||||
elif error_code == 'NOT_FOUND':
|
||||
raise ExtractorError('The video is not found.',
|
||||
expected=True)
|
||||
elif error_code == 'COMMUNITY':
|
||||
self.to_screen('%s: The video is community members only.' % video_id)
|
||||
else:
|
||||
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
|
||||
|
||||
video_real_url = flv_info['url'][0]
|
||||
# Start extracting video formats
|
||||
formats = []
|
||||
|
||||
extension = get_video_info('movie_type')
|
||||
if not extension:
|
||||
extension = determine_ext(video_real_url)
|
||||
# Get HTML5 videos info
|
||||
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||
if not quality_info:
|
||||
raise ExtractorError('The video can\'t be downloaded', expected=True)
|
||||
|
||||
formats = [{
|
||||
'url': video_real_url,
|
||||
'ext': extension,
|
||||
'format_id': _format_id_from_url(video_real_url),
|
||||
}]
|
||||
else:
|
||||
formats = []
|
||||
for audio_quality in quality_info.get('audios') or {}:
|
||||
for video_quality in quality_info.get('videos') or {}:
|
||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
continue
|
||||
formats.append(self._extract_format_for_quality(
|
||||
api_data, video_id, audio_quality, video_quality))
|
||||
|
||||
dmc_info = api_data['video'].get('dmcInfo')
|
||||
if dmc_info: # "New" HTML5 videos
|
||||
quality_info = dmc_info['quality']
|
||||
for audio_quality in quality_info['audios']:
|
||||
for video_quality in quality_info['videos']:
|
||||
if not audio_quality['available'] or not video_quality['available']:
|
||||
continue
|
||||
formats.append(self._extract_format_for_quality(
|
||||
api_data, video_id, audio_quality, video_quality))
|
||||
# Get flv/swf info
|
||||
timestamp = None
|
||||
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||
if video_real_url:
|
||||
is_economy = video_real_url.endswith('low')
|
||||
|
||||
self._sort_formats(formats)
|
||||
else: # "Old" HTML5 videos
|
||||
formats = [{
|
||||
if is_economy:
|
||||
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||
|
||||
# Invoking ffprobe to determine resolution
|
||||
pp = FFmpegPostProcessor(self._downloader)
|
||||
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||
|
||||
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||
|
||||
try:
|
||||
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||
except PostProcessingError as err:
|
||||
raise ExtractorError(err.msg, expected=True)
|
||||
|
||||
v_stream = a_stream = {}
|
||||
|
||||
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||
for stream in metadata['streams']:
|
||||
if stream['codec_type'] == 'video':
|
||||
v_stream = stream
|
||||
elif stream['codec_type'] == 'audio':
|
||||
a_stream = stream
|
||||
|
||||
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||
filesize = int(
|
||||
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||
or metadata['format']['size']
|
||||
)
|
||||
extension = (
|
||||
get_video_info_xml('movie_type')
|
||||
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||
)
|
||||
|
||||
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||
timestamp = (
|
||||
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||
)
|
||||
metadata_timestamp = (
|
||||
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||
or timestamp if extension != 'mp4' else 0
|
||||
)
|
||||
|
||||
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||
|
||||
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||
|
||||
# If movie file size is unstable, old server movie is not source movie.
|
||||
if filesize > 1:
|
||||
formats.append({
|
||||
'url': video_real_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': _format_id_from_url(video_real_url),
|
||||
}]
|
||||
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||
'ext': extension,
|
||||
'container': extension,
|
||||
'vcodec': v_stream.get('codec_name'),
|
||||
'acodec': a_stream.get('codec_name'),
|
||||
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||
'height': int_or_none(v_stream.get('height')),
|
||||
'width': int_or_none(v_stream.get('width')),
|
||||
'source_preference': 5 if not is_economy else -2,
|
||||
'quality': 5 if is_source and not is_economy else None,
|
||||
'filesize': filesize
|
||||
})
|
||||
|
||||
def get_video_info(items):
|
||||
return dict_get(api_data['video'], items)
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Start extracting information
|
||||
title = get_video_info('title')
|
||||
if not title:
|
||||
title = self._og_search_title(webpage, default=None)
|
||||
if not title:
|
||||
title = self._html_search_regex(
|
||||
title = (
|
||||
get_video_info_xml('title') # prefer to get the untranslated original title
|
||||
or get_video_info_web(['originalTitle', 'title'])
|
||||
or self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(
|
||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||
webpage, 'video title')
|
||||
webpage, 'video title'))
|
||||
|
||||
watch_api_data_string = self._html_search_regex(
|
||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||
@@ -372,14 +529,15 @@ class NiconicoIE(InfoExtractor):
|
||||
video_detail = watch_api_data.get('videoDetail', {})
|
||||
|
||||
thumbnail = (
|
||||
get_video_info(['thumbnail_url', 'thumbnailURL'])
|
||||
self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
|
||||
or dict_get( # choose highest from 720p to 240p
|
||||
get_video_info_web('thumbnail'),
|
||||
['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
|
||||
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
||||
or video_detail.get('thumbnail'))
|
||||
|
||||
description = get_video_info('description')
|
||||
description = get_video_info_web('description')
|
||||
|
||||
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
|
||||
or unified_timestamp(get_video_info('postedDateTime')))
|
||||
if not timestamp:
|
||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||
if match:
|
||||
@@ -388,19 +546,25 @@ class NiconicoIE(InfoExtractor):
|
||||
timestamp = parse_iso8601(
|
||||
video_detail['postedAt'].replace('/', '-'),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||
|
||||
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
||||
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||
if not view_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||
webpage, 'view count', default=None)
|
||||
if match:
|
||||
view_count = int_or_none(match.replace(',', ''))
|
||||
view_count = view_count or video_detail.get('viewCount')
|
||||
view_count = (
|
||||
view_count
|
||||
or video_detail.get('viewCount')
|
||||
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||
|
||||
comment_count = (
|
||||
int_or_none(get_video_info_web('comment_num'))
|
||||
or video_detail.get('commentCount')
|
||||
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||
|
||||
comment_count = (int_or_none(get_video_info('comment_num'))
|
||||
or video_detail.get('commentCount')
|
||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
||||
if not comment_count:
|
||||
match = self._html_search_regex(
|
||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||
@@ -409,22 +573,41 @@ class NiconicoIE(InfoExtractor):
|
||||
comment_count = int_or_none(match.replace(',', ''))
|
||||
|
||||
duration = (parse_duration(
|
||||
get_video_info('length')
|
||||
get_video_info_web('length')
|
||||
or self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', default=None))
|
||||
or video_detail.get('length')
|
||||
or get_video_info('duration'))
|
||||
or get_video_info_web('duration'))
|
||||
|
||||
webpage_url = get_video_info('watch_url') or url
|
||||
webpage_url = get_video_info_web('watch_url') or url
|
||||
|
||||
# for channel movie and community movie
|
||||
channel_id = try_get(
|
||||
api_data,
|
||||
(lambda x: x['channel']['globalId'],
|
||||
lambda x: x['community']['globalId']))
|
||||
channel = try_get(
|
||||
api_data,
|
||||
(lambda x: x['channel']['name'],
|
||||
lambda x: x['community']['name']))
|
||||
|
||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||
# in the JSON, which will cause None to be returned instead of {}.
|
||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
||||
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
||||
uploader_id = str_or_none(
|
||||
get_video_info_web(['ch_id', 'user_id'])
|
||||
or owner.get('id')
|
||||
or channel_id
|
||||
)
|
||||
uploader = (
|
||||
get_video_info_web(['ch_name', 'user_nickname'])
|
||||
or owner.get('nickname')
|
||||
or channel
|
||||
)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'_api_data': api_data,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
@@ -432,6 +615,8 @@ class NiconicoIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'duration': duration,
|
||||
@@ -440,7 +625,7 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
|
||||
class NiconicoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||
@@ -456,60 +641,185 @@ class NiconicoPlaylistIE(InfoExtractor):
|
||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _call_api(self, list_id, resource, query):
|
||||
return self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
'Downloading %s JSON metatdata' % resource, query=query,
|
||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||
|
||||
def _parse_owner(self, item):
|
||||
owner = item.get('owner') or {}
|
||||
if owner:
|
||||
return {
|
||||
'uploader': owner.get('name'),
|
||||
'uploader_id': owner.get('id'),
|
||||
}
|
||||
return {}
|
||||
|
||||
def _fetch_page(self, list_id, page):
|
||||
page += 1
|
||||
items = self._call_api(list_id, 'page %d' % page, {
|
||||
'page': page,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
})['items']
|
||||
for item in items:
|
||||
video = item.get('video') or {}
|
||||
video_id = video.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
count = video.get('count') or {}
|
||||
get_count = lambda x: int_or_none(count.get(x))
|
||||
info = {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
'description': video.get('shortDescription'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': get_count('view'),
|
||||
'comment_count': get_count('comment'),
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
}
|
||||
info.update(self._parse_owner(video))
|
||||
yield info
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0'
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
mylist = self._call_api(list_id, 'list', {
|
||||
'pageSize': 1,
|
||||
})
|
||||
entries = InAdvancePagedList(
|
||||
functools.partial(self._fetch_page, list_id),
|
||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
result = self.playlist_result(
|
||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||
result.update(self._parse_owner(mylist))
|
||||
return result
|
||||
|
||||
def get_page_data(pagenum, pagesize):
|
||||
return self._download_json(
|
||||
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
query={'page': 1 + pagenum, 'pageSize': pagesize},
|
||||
headers=self._API_HEADERS).get('data').get('mylist')
|
||||
|
||||
data = get_page_data(0, 1)
|
||||
title = data.get('name')
|
||||
description = data.get('description')
|
||||
uploader = data.get('owner').get('name')
|
||||
uploader_id = data.get('owner').get('id')
|
||||
|
||||
def pagefunc(pagenum):
|
||||
data = get_page_data(pagenum, 25)
|
||||
return ({
|
||||
'_type': 'url',
|
||||
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
|
||||
} for item in data.get('items'))
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': list_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'entries': OnDemandPagedList(pagefunc, 25),
|
||||
}
|
||||
|
||||
|
||||
class NicovideoSearchBaseIE(InfoExtractor):
|
||||
_MAX_RESULTS = float('inf')
|
||||
|
||||
def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
|
||||
query = query or {}
|
||||
pages = [query['page']] if 'page' in query else itertools.count(1)
|
||||
for page_num in pages:
|
||||
query['page'] = str(page_num)
|
||||
webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
|
||||
results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.+?)(?=["\'])', webpage)
|
||||
for item in results:
|
||||
yield self.url_result('http://www.nicovideo.jp/watch/%s' % item, 'Niconico', item)
|
||||
if not results:
|
||||
break
|
||||
|
||||
def _get_n_results(self, query, n):
|
||||
entries = self._entries(self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
||||
if n < self._MAX_RESULTS:
|
||||
entries = itertools.islice(entries, 0, n)
|
||||
return self.playlist_result(entries, query, query)
|
||||
|
||||
|
||||
class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||
IE_DESC = 'Nico video search'
|
||||
IE_NAME = 'nicovideo:search'
|
||||
_SEARCH_KEY = 'nicosearch'
|
||||
|
||||
def _search_results(self, query):
|
||||
return self._entries(
|
||||
self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
||||
|
||||
|
||||
class NicovideoSearchURLIE(NicovideoSearchBaseIE):
|
||||
IE_NAME = '%s_url' % NicovideoSearchIE.IE_NAME
|
||||
IE_DESC = 'Nico video search URLs'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/search/sm9',
|
||||
'info_dict': {
|
||||
'id': 'sm9',
|
||||
'title': 'sm9'
|
||||
},
|
||||
'playlist_mincount': 40,
|
||||
}, {
|
||||
'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
|
||||
'info_dict': {
|
||||
'id': 'sm9',
|
||||
'title': 'sm9'
|
||||
},
|
||||
'playlist_count': 31,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = self._match_id(url)
|
||||
return self.playlist_result(self._entries(url, query), query, query)
|
||||
|
||||
|
||||
class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||
IE_DESC = 'Nico video search, newest first'
|
||||
IE_NAME = '%s:date' % NicovideoSearchIE.IE_NAME
|
||||
_SEARCH_KEY = 'nicosearchdate'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'nicosearchdateall:a',
|
||||
'info_dict': {
|
||||
'id': 'a',
|
||||
'title': 'a'
|
||||
},
|
||||
'playlist_mincount': 1610,
|
||||
}]
|
||||
|
||||
_START_DATE = datetime.date(2007, 1, 1)
|
||||
_RESULTS_PER_PAGE = 32
|
||||
_MAX_PAGES = 50
|
||||
|
||||
def _entries(self, url, item_id, start_date=None, end_date=None):
|
||||
start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
|
||||
|
||||
# If the last page has a full page of videos, we need to break down the query interval further
|
||||
last_page_len = len(list(self._get_entries_for_date(
|
||||
url, item_id, start_date, end_date, self._MAX_PAGES,
|
||||
note='Checking number of videos from {0} to {1}'.format(start_date, end_date))))
|
||||
if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
|
||||
midpoint = start_date + ((end_date - start_date) // 2)
|
||||
for entry in itertools.chain(
|
||||
iter(self._entries(url, item_id, midpoint, end_date)),
|
||||
iter(self._entries(url, item_id, start_date, midpoint))):
|
||||
yield entry
|
||||
else:
|
||||
self.to_screen('{0}: Downloading results from {1} to {2}'.format(item_id, start_date, end_date))
|
||||
for entry in iter(self._get_entries_for_date(
|
||||
url, item_id, start_date, end_date, note=' Downloading page %(page)s')):
|
||||
yield entry
|
||||
|
||||
def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
|
||||
query = {
|
||||
'start': compat_str(start_date),
|
||||
'end': compat_str(end_date or start_date),
|
||||
'sort': 'f',
|
||||
'order': 'd',
|
||||
}
|
||||
if page_num:
|
||||
query['page'] = compat_str(page_num)
|
||||
|
||||
for entry in iter(super(NicovideoSearchDateIE, self)._entries(url, item_id, query=query, note=note)):
|
||||
yield entry
|
||||
|
||||
|
||||
class NiconicoUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||
_TEST = {
|
||||
'url': 'https://www.nicovideo.jp/user/419948',
|
||||
'info_dict': {
|
||||
'id': '419948',
|
||||
},
|
||||
'playlist_mincount': 101,
|
||||
}
|
||||
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0'
|
||||
}
|
||||
|
||||
def _entries(self, list_id):
|
||||
total_count = 1
|
||||
count = page_num = 0
|
||||
while count < total_count:
|
||||
json_parsed = self._download_json(
|
||||
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||
headers=self._API_HEADERS,
|
||||
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||
if not page_num:
|
||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||
for entry in json_parsed["data"]["items"]:
|
||||
count += 1
|
||||
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||
page_num += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
@@ -60,8 +60,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
urljoin('https://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
fatal=fatal, query=query)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
|
@@ -1,71 +1,113 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class NuvidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://m.nuvid.com/video/1310741/',
|
||||
'md5': 'eab207b7ac4fccfb4e23c86201f11277',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nuvid.com/video/6513023/italian-babe',
|
||||
'md5': '772d2f8288f3d3c5c45f7a41761c7844',
|
||||
'info_dict': {
|
||||
'id': '1310741',
|
||||
'id': '6513023',
|
||||
'ext': 'mp4',
|
||||
'title': 'Horny babes show their awesome bodeis and',
|
||||
'duration': 129,
|
||||
'title': 'italian babe',
|
||||
'format_id': '360p',
|
||||
'duration': 321.0,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'thumbnails': list,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://m.nuvid.com/video/6523263',
|
||||
'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52',
|
||||
'info_dict': {
|
||||
'id': '6523263',
|
||||
'ext': 'mp4',
|
||||
'title': 'Slut brunette college student anal dorm',
|
||||
'format_id': '720p',
|
||||
'duration': 421.0,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'thumbnails': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://m.nuvid.com/video/6415801/',
|
||||
'md5': '638d5ececb138d5753593f751ae3f697',
|
||||
'info_dict': {
|
||||
'id': '6415801',
|
||||
'ext': 'mp4',
|
||||
'title': 'My best friend wanted to fuck my wife for a long time',
|
||||
'format_id': '720p',
|
||||
'duration': 1882,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'thumbnails': list,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
page_url = 'http://m.nuvid.com/video/%s' % video_id
|
||||
qualities = {
|
||||
'lq': '360p',
|
||||
'hq': '720p',
|
||||
}
|
||||
|
||||
json_url = 'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0'.format(**locals())
|
||||
video_data = self._download_json(
|
||||
json_url, video_id, headers={
|
||||
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
|
||||
}) or {}
|
||||
|
||||
# nice to have, not required
|
||||
webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading video page')
|
||||
# When dwnld_speed exists and has a value larger than the MP4 file's
|
||||
# bitrate, Nuvid returns the MP4 URL
|
||||
# It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm
|
||||
self._set_cookie('nuvid.com', 'dwnld_speed', '10.0')
|
||||
mp4_webpage = self._download_webpage(
|
||||
page_url, video_id, 'Downloading video page for MP4 format')
|
||||
'http://m.nuvid.com/video/%s' % (video_id, ),
|
||||
video_id, 'Downloading video page', fatal=False) or ''
|
||||
|
||||
title = (
|
||||
try_get(video_data, lambda x: x['title'], compat_str)
|
||||
or self._html_search_regex(
|
||||
(r'''<span\s[^>]*?\btitle\s*=\s*(?P<q>"|'|\b)(?P<title>[^"]+)(?P=q)\s*>''',
|
||||
r'''<div\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)thumb-holder video(?P=q)>\s*<h5\b[^>]*>(?P<title>[^<]+)</h5''',
|
||||
r'''<span\s[^>]*?\bclass\s*=\s*(?P<q>"|'|\b)title_thumb(?P=q)>(?P<title>[^<]+)</span'''),
|
||||
webpage, 'title', group='title')).strip()
|
||||
|
||||
html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']',
|
||||
video_url = self._html_search_regex(html5_video_re, webpage, video_id)
|
||||
mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id)
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
}]
|
||||
if mp4_video_url != video_url:
|
||||
formats.append({
|
||||
'url': mp4_video_url,
|
||||
})
|
||||
'url': source,
|
||||
'format_id': qualities.get(quality),
|
||||
'height': int_or_none(qualities.get(quality)[:-1]),
|
||||
} for quality, source in video_data.get('files').items() if source]
|
||||
|
||||
title = self._html_search_regex(
|
||||
[r'<span title="([^"]+)">',
|
||||
r'<div class="thumb-holder video">\s*<h5[^>]*>([^<]+)</h5>',
|
||||
r'<span[^>]+class="title_thumb">([^<]+)</span>'], webpage, 'title').strip()
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(video_data.get('duration') or video_data.get('duration_format'))
|
||||
thumbnails = [
|
||||
{
|
||||
'url': thumb_url,
|
||||
} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
|
||||
{'url': thumb_url, }
|
||||
for thumb_url in (
|
||||
url_or_none(src) for src in re.findall(
|
||||
r'<div\s+class\s*=\s*"video-tmb-wrap"\s*>\s*<img\s+src\s*=\s*"([^"]+)"\s*/>',
|
||||
webpage))
|
||||
]
|
||||
thumbnail = thumbnails[0]['url'] if thumbnails else None
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
[r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})',
|
||||
r'<span[^>]+class="view_time">([^<]+)</span>'], webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'thumbnail': url_or_none(video_data.get('poster')),
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
get_exe_version,
|
||||
is_outdated_version,
|
||||
process_communicate_or_kill,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
@@ -226,7 +227,7 @@ class PhantomJSwrapper(object):
|
||||
self.exe, '--ssl-protocol=any',
|
||||
self._TMP_FILES['script'].name
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
out, err = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
raise ExtractorError(
|
||||
'Executing JS failed\n:' + encodeArgument(err))
|
||||
|
193
youtube_dl/extractor/peekvids.py
Normal file
193
youtube_dl/extractor/peekvids.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PeekVidsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?peekvids\.com/
|
||||
(?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
|
||||
(?P<id>[^/?&#]*)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
|
||||
'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
|
||||
'info_dict': {
|
||||
'id': '1262717',
|
||||
'display_id': 'BSyLMbN0YCd',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1642579329,
|
||||
'upload_date': '20220119',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.peekvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-video-block', html)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, expected_status=429)
|
||||
if '>Rate Limit Exceeded' in webpage:
|
||||
raise ExtractorError(
|
||||
'[%s] %s: %s' % (self.IE_NAME, video_id, 'You are suspected as a bot. Wait, or pass the captcha test on the site and provide --cookies.'),
|
||||
expected=True)
|
||||
|
||||
title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
display_id = video_id
|
||||
video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
|
||||
srcs = self._download_json(
|
||||
'https://%s/v-alt/%s' % (self._DOMAIN, video_id), video_id,
|
||||
note='Downloading list of source files')
|
||||
formats = [{
|
||||
'url': f_url,
|
||||
'format_id': f_id,
|
||||
'height': int_or_none(f_id),
|
||||
} for f_url, f_id in (
|
||||
(url_or_none(f_v), f_match.group(1))
|
||||
for f_v, f_match in (
|
||||
(v, re.match(r'^data-src(\d{3,})$', k))
|
||||
for k, v in srcs.items() if v) if f_match)
|
||||
if f_url
|
||||
]
|
||||
if not formats:
|
||||
formats = [{'url': url} for url in srcs.values()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
|
||||
info.pop('url', None)
|
||||
# may not have found the thumbnail if it was in a list in the ld+json
|
||||
info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
|
||||
detail = self._get_detail(webpage) or ''
|
||||
info['description'] = self._html_search_regex(
|
||||
r'(?s)(.+?)(?:%s\s*<|<ul\b)' % (re.escape(info.get('description', '')), ),
|
||||
detail, 'description', default=None) or None
|
||||
info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
|
||||
|
||||
def cat_tags(name, html):
|
||||
l = self._html_search_regex(
|
||||
r'(?s)<span\b[^>]*>\s*%s\s*:\s*</span>(.+?)</li>' % (re.escape(name), ),
|
||||
html, name, default='')
|
||||
return [x for x in re.split(r'\s+', l) if x]
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
'categories': cat_tags('Categories', detail),
|
||||
'tags': cat_tags('Tags', detail),
|
||||
'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
|
||||
}, info)
|
||||
|
||||
|
||||
class PlayVidsIE(PeekVidsIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'md5': '2f12e50213dd65f142175da633c4564c',
|
||||
'info_dict': {
|
||||
'id': '1978030',
|
||||
'display_id': 'U3pBrYhsjXM',
|
||||
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
|
||||
'timestamp': 1640435839,
|
||||
'upload_date': '20211225',
|
||||
'duration': 416,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'uploader': 'SEXYhub.com',
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
|
||||
'md5': 'e783986e596cafbf46411a174ab42ba6',
|
||||
'info_dict': {
|
||||
'id': '762385',
|
||||
'display_id': 'bKmGLe3IwjZ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
|
||||
'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
|
||||
'timestamp': 1516958544,
|
||||
'upload_date': '20180126',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 480,
|
||||
'uploader': 'Brazzers',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/v/47iUho33toY',
|
||||
'md5': 'b056b5049d34b648c1e86497cf4febce',
|
||||
'info_dict': {
|
||||
'id': '700621',
|
||||
'display_id': '47iUho33toY',
|
||||
'ext': 'mp4',
|
||||
'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
|
||||
'description': None,
|
||||
'timestamp': 1507052209,
|
||||
'upload_date': '20171003',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 332,
|
||||
'uploader': 'Cacerenele',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
|
||||
'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
|
||||
'info_dict': {
|
||||
'id': '1523518',
|
||||
'display_id': 'z3_7iwWCmqt',
|
||||
'ext': 'mp4',
|
||||
'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
|
||||
'description': None,
|
||||
'timestamp': 1607470323,
|
||||
'upload_date': '20201208',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 593,
|
||||
'uploader': 'yorours',
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'www.playvids.com'
|
||||
|
||||
def _get_detail(self, html):
|
||||
return get_element_by_class('detail-block', html)
|
105
youtube_dl/extractor/pr0gramm.py
Normal file
105
youtube_dl/extractor/pr0gramm.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
import re
|
||||
from ..utils import (
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
class Pr0grammStaticIE(InfoExtractor):
|
||||
# Possible urls:
|
||||
# https://pr0gramm.com/static/5466437
|
||||
_VALID_URL = r'https?://pr0gramm\.com/static/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'md5': '52fa540d70d3edc286846f8ca85938aa',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5466437 by g11st',
|
||||
'uploader': 'g11st',
|
||||
'upload_date': '20221221',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
# Fetch media sources
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
media_info = entries[0]
|
||||
|
||||
# this raises if there are no formats
|
||||
self._sort_formats(media_info.get('formats') or [])
|
||||
|
||||
# Fetch author
|
||||
uploader = self._html_search_regex(r'by\W+([\w-]+)\W+', webpage, 'uploader')
|
||||
|
||||
# Fetch approx upload timestamp from filename
|
||||
# Have None-defaults in case the extraction fails
|
||||
uploadDay = None
|
||||
uploadMon = None
|
||||
uploadYear = None
|
||||
uploadTimestr = None
|
||||
# (//img.pr0gramm.com/2022/12/21/62ae8aa5e2da0ebf.mp4)
|
||||
m = re.search(r'//img\.pr0gramm\.com/(?P<year>[\d]+)/(?P<mon>[\d]+)/(?P<day>[\d]+)/\w+\.\w{,4}', webpage)
|
||||
|
||||
if (m):
|
||||
# Up to a day of accuracy should suffice...
|
||||
uploadDay = m.groupdict().get('day')
|
||||
uploadMon = m.groupdict().get('mon')
|
||||
uploadYear = m.groupdict().get('year')
|
||||
uploadTimestr = uploadYear + uploadMon + uploadDay
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': 'pr0gramm-%s%s' % (video_id, (' by ' + uploader) if uploader else ''),
|
||||
'uploader': uploader,
|
||||
'upload_date': uploadTimestr
|
||||
}, media_info)
|
||||
|
||||
|
||||
# This extractor is for the primary url (used for sharing, and appears in the
|
||||
# location bar) Since this page loads the DOM via JS, yt-dl can't find any
|
||||
# video information here. So let's redirect to a compatibility version of
|
||||
# the site, which does contain the <video>-element by itself, without requiring
|
||||
# js to be ran.
|
||||
class Pr0grammIE(InfoExtractor):
|
||||
# Possible urls:
|
||||
# https://pr0gramm.com/new/546637
|
||||
# https://pr0gramm.com/new/video/546637
|
||||
# https://pr0gramm.com/top/546637
|
||||
# https://pr0gramm.com/top/video/546637
|
||||
# https://pr0gramm.com/user/g11st/uploads/5466437
|
||||
# https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290
|
||||
# https://pr0gramm.com/user/froschler/reinziehen-1elf/5232030
|
||||
# https://pr0gramm.com/user/froschler/1elf/5232030
|
||||
# https://pr0gramm.com/new/5495710:comment62621020 <- this is not the id!
|
||||
# https://pr0gramm.com/top/fruher war alles damals/5498175
|
||||
|
||||
_VALID_URL = r'https?:\/\/pr0gramm\.com\/(?!static/\d+).+?\/(?P<id>[\d]+)(:|$)'
|
||||
_TEST = {
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5466437 by g11st',
|
||||
'uploader': 'g11st',
|
||||
'upload_date': '20221221',
|
||||
}
|
||||
}
|
||||
|
||||
def _generic_title():
|
||||
return "oof"
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self.url_result(
|
||||
'https://pr0gramm.com/static/' + video_id,
|
||||
video_id=video_id,
|
||||
ie=Pr0grammStaticIE.ie_key())
|
@@ -5,15 +5,16 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
GeoRestrictedError,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
@@ -96,12 +97,100 @@ class RaiBaseIE(InfoExtractor):
|
||||
if not formats and geoprotection is True:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
formats.extend(self._create_http_urls(relinker_url, formats))
|
||||
|
||||
return dict((k, v) for k, v in {
|
||||
'is_live': is_live,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}.items() if v is not None)
|
||||
|
||||
def _create_http_urls(self, relinker_url, fmts):
|
||||
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
|
||||
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||
_QUALITY = {
|
||||
# tbr: w, h
|
||||
'250': [352, 198],
|
||||
'400': [512, 288],
|
||||
'700': [512, 288],
|
||||
'800': [700, 394],
|
||||
'1200': [736, 414],
|
||||
'1800': [1024, 576],
|
||||
'2400': [1280, 720],
|
||||
'3200': [1440, 810],
|
||||
'3600': [1440, 810],
|
||||
'5000': [1920, 1080],
|
||||
'10000': [1920, 1080],
|
||||
}
|
||||
|
||||
def test_url(url):
|
||||
resp = self._request_webpage(
|
||||
HEADRequest(url), None, headers={'User-Agent': 'Rai'},
|
||||
fatal=False, errnote=False, note=False)
|
||||
|
||||
if resp is False:
|
||||
return False
|
||||
|
||||
if resp.code == 200:
|
||||
return False if resp.url == url else resp.url
|
||||
return None
|
||||
|
||||
def get_format_info(tbr):
|
||||
import math
|
||||
br = int_or_none(tbr)
|
||||
if len(fmts) == 1 and not br:
|
||||
br = fmts[0].get('tbr')
|
||||
if br > 300:
|
||||
tbr = compat_str(math.floor(br / 100) * 100)
|
||||
else:
|
||||
tbr = '250'
|
||||
|
||||
# try extracting info from available m3u8 formats
|
||||
format_copy = None
|
||||
for f in fmts:
|
||||
if f.get('tbr'):
|
||||
br_limit = math.floor(br / 100)
|
||||
if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
|
||||
format_copy = f.copy()
|
||||
return {
|
||||
'width': format_copy.get('width'),
|
||||
'height': format_copy.get('height'),
|
||||
'tbr': format_copy.get('tbr'),
|
||||
'vcodec': format_copy.get('vcodec'),
|
||||
'acodec': format_copy.get('acodec'),
|
||||
'fps': format_copy.get('fps'),
|
||||
'format_id': 'https-%s' % tbr,
|
||||
} if format_copy else {
|
||||
'width': _QUALITY[tbr][0],
|
||||
'height': _QUALITY[tbr][1],
|
||||
'format_id': 'https-%s' % tbr,
|
||||
'tbr': int(tbr),
|
||||
}
|
||||
|
||||
loc = test_url(_MP4_TMPL % (relinker_url, '*'))
|
||||
if not isinstance(loc, compat_str):
|
||||
return []
|
||||
|
||||
mobj = re.match(
|
||||
_RELINKER_REG,
|
||||
test_url(relinker_url) or '')
|
||||
if not mobj:
|
||||
return []
|
||||
|
||||
available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
|
||||
available_qualities = [i for i in available_qualities if i]
|
||||
|
||||
formats = []
|
||||
for q in available_qualities:
|
||||
fmt = {
|
||||
'url': _MP4_TMPL % (relinker_url, q),
|
||||
'protocol': 'https',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
fmt.update(get_format_info(q))
|
||||
formats.append(fmt)
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
def _extract_subtitles(url, video_data):
|
||||
STL_EXT = 'stl'
|
||||
@@ -151,6 +240,22 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# 1080p direct mp4 url
|
||||
'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
|
||||
'md5': '2e501e8651d72f05ffe8f5d286ad560b',
|
||||
'info_dict': {
|
||||
'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leonardo - S1E1',
|
||||
'alt_title': 'St 1 Ep 1 - Episodio 1',
|
||||
'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Rai 1',
|
||||
'duration': 3229,
|
||||
'series': 'Leonardo',
|
||||
'season': 'Season 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
@@ -158,6 +263,10 @@ class RaiPlayIE(RaiBaseIE):
|
||||
# subtitles at 'subtitlesArray' key (see #27698)
|
||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# DRM protected
|
||||
'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -166,6 +275,13 @@ class RaiPlayIE(RaiBaseIE):
|
||||
media = self._download_json(
|
||||
base + '.json', video_id, 'Downloading video JSON')
|
||||
|
||||
if try_get(
|
||||
media,
|
||||
(lambda x: x['rights_management']['rights']['drm'],
|
||||
lambda x: x['program_info']['rights_management']['rights']['drm']),
|
||||
dict):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
title = media['name']
|
||||
|
||||
video = media['video']
|
||||
@@ -307,7 +423,7 @@ class RaiIE(RaiBaseIE):
|
||||
}, {
|
||||
# with ContentItem in og:url
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
||||
'md5': '06345bd97c932f19ffb129973d07a020',
|
||||
'info_dict': {
|
||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||
'ext': 'mp4',
|
||||
|
97
youtube_dl/extractor/rbgtum.py
Normal file
97
youtube_dl/extractor/rbgtum.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class RbgTumIE(InfoExtractor):
|
||||
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
||||
_TESTS = [{
|
||||
# Combined view
|
||||
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||
'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
|
||||
'info_dict': {
|
||||
'id': 'cpp/22128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture: October 18. 2022',
|
||||
'series': 'Concepts of C++ programming (IN2377)',
|
||||
}
|
||||
}, {
|
||||
# Presentation only
|
||||
'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
|
||||
'md5': '36c584272179f3e56b0db5d880639cba',
|
||||
'info_dict': {
|
||||
'id': 'I2DL/12349/PRES',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lecture 3: Introduction to Neural Networks',
|
||||
'series': 'Introduction to Deep Learning (IN2346)',
|
||||
}
|
||||
}, {
|
||||
# Camera only
|
||||
'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
|
||||
'md5': 'e04189d92ff2f56aedf5cede65d37aad',
|
||||
'info_dict': {
|
||||
'id': 'fvv-info/16130/CAM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fachschaftsvollversammlung',
|
||||
'series': 'Fachschaftsvollversammlung Informatik',
|
||||
}
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
||||
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||
lecture_series_title = self._html_search_regex(
|
||||
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': lecture_title,
|
||||
'series': lecture_series_title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class RbgTumCourseIE(InfoExtractor):
|
||||
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
||||
'info_dict': {
|
||||
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||
'id': '2022/S/fpv',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
||||
'info_dict': {
|
||||
'title': 'SET FSMPIC',
|
||||
'id': '2022/W/set',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': False,
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, ]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, course_id)
|
||||
|
||||
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||
|
||||
lecture_urls = []
|
||||
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
||||
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
||||
|
||||
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
@@ -6,7 +6,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none
|
||||
int_or_none,
|
||||
str_to_int
|
||||
)
|
||||
|
||||
|
||||
@@ -179,7 +180,7 @@ class RUTVIE(InfoExtractor):
|
||||
'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22',
|
||||
'rtmp_live': True,
|
||||
'ext': 'flv',
|
||||
'vbr': int(quality),
|
||||
'vbr': str_to_int(quality),
|
||||
'preference': preference,
|
||||
}
|
||||
elif transport == 'm3u8':
|
||||
|
@@ -1,105 +1,126 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
merge_dicts,
|
||||
parse_codecs,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
def _get_api_key(api_path):
|
||||
if api_path.endswith('?'):
|
||||
api_path = api_path[:-1]
|
||||
|
||||
api_key = 'fb5f58a820353bd7095de526253c14fd'
|
||||
a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600)))
|
||||
return hashlib.md5(a.encode('ascii')).hexdigest()
|
||||
|
||||
|
||||
class StreamCZIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)'
|
||||
_API_URL = 'http://www.stream.cz/API'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
|
||||
'md5': '934bb6a6d220d99c010783c9719960d5',
|
||||
'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890',
|
||||
'md5': '40c41ade1464a390a0b447e333df4239',
|
||||
'info_dict': {
|
||||
'id': '765767',
|
||||
'id': '57953890',
|
||||
'ext': 'mp4',
|
||||
'title': 'Peklo na talíři: Éčka pro děti',
|
||||
'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE',
|
||||
'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
|
||||
'duration': 256,
|
||||
},
|
||||
'title': 'Bůh',
|
||||
'display_id': 'buh',
|
||||
'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
|
||||
'duration': 1369.6,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka',
|
||||
'md5': '849a88c1e1ca47d41403c2ba5e59e261',
|
||||
'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
|
||||
'md5': '41fd358000086a1ccdb068c77809b158',
|
||||
'info_dict': {
|
||||
'id': '10002447',
|
||||
'id': '64087937',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kancelář Blaník: Tři roky pro Mazánka',
|
||||
'description': 'md5:3862a00ba7bf0b3e44806b544032c859',
|
||||
'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000',
|
||||
'duration': 368,
|
||||
},
|
||||
'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
|
||||
'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
|
||||
'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
|
||||
'duration': 50.2,
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
|
||||
'md5': '3ee4d0be040e8f4a543e67e509d55e3f',
|
||||
'info_dict': {
|
||||
'id': '64147267',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
|
||||
'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
|
||||
'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
|
||||
'duration': 442.84,
|
||||
'view_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_formats(self, spl_url, video):
|
||||
for ext, pref, streams in (
|
||||
('ts', -1, video.get('http_stream', {}).get('qualities', {})),
|
||||
('mp4', 1, video.get('mp4'))):
|
||||
for format_id, stream in streams.items():
|
||||
if not stream.get('url'):
|
||||
continue
|
||||
yield merge_dicts({
|
||||
'format_id': '-'.join((format_id, ext)),
|
||||
'ext': ext,
|
||||
'source_preference': pref,
|
||||
'url': urljoin(spl_url, stream['url']),
|
||||
'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
|
||||
'duration': float_or_none(stream.get('duration'), scale=1000),
|
||||
'width': stream.get('resolution', 2 * [0])[0] or None,
|
||||
'height': stream.get('resolution', 2 * [0])[1] or int_or_none(format_id.replace('p', '')),
|
||||
}, parse_codecs(stream.get('codec')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
api_path = '/episode/%s' % video_id
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
req = sanitized_Request(self._API_URL + api_path)
|
||||
req.add_header('Api-Password', _get_api_key(api_path))
|
||||
data = self._download_json(req, video_id)
|
||||
data = self._download_json(
|
||||
'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',
|
||||
data=json.dumps({
|
||||
'variables': {'urlName': video_id},
|
||||
'query': '''
|
||||
query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } }
|
||||
fragment VideoDetailFragmentOnEpisode on Episode {
|
||||
id
|
||||
spl
|
||||
urlName
|
||||
name
|
||||
perex
|
||||
duration
|
||||
views
|
||||
}'''
|
||||
}).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=UTF-8'}
|
||||
)['data']['episode']
|
||||
|
||||
formats = []
|
||||
for quality, video in enumerate(data['video_qualities']):
|
||||
for f in video['formats']:
|
||||
typ = f['type'].partition('/')[2]
|
||||
qlabel = video.get('quality_label')
|
||||
formats.append({
|
||||
'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ,
|
||||
'format_id': '%s-%s' % (typ, f['quality']),
|
||||
'url': f['source'],
|
||||
'height': int_or_none(f['quality'].rstrip('p')),
|
||||
'quality': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
image = data.get('image')
|
||||
if image:
|
||||
thumbnail = self._proto_relative_url(
|
||||
image.replace('{width}', '1240').replace('{height}', '697'),
|
||||
scheme='http:',
|
||||
)
|
||||
else:
|
||||
thumbnail = None
|
||||
|
||||
stream = data.get('_embedded', {}).get('stream:show', {}).get('name')
|
||||
if stream:
|
||||
title = '%s: %s' % (stream, data['name'])
|
||||
else:
|
||||
title = data['name']
|
||||
spl_url = data['spl'] + 'spl2,3'
|
||||
metadata = self._download_json(spl_url, video_id, 'Downloading playlist')
|
||||
if 'Location' in metadata and 'data' not in metadata:
|
||||
spl_url = metadata['Location']
|
||||
metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist')
|
||||
video = metadata['data']
|
||||
|
||||
subtitles = {}
|
||||
srt_url = data.get('subtitles_srt')
|
||||
if srt_url:
|
||||
subtitles['cs'] = [{
|
||||
'ext': 'srt',
|
||||
'url': srt_url,
|
||||
}]
|
||||
for subs in video.get('subtitles', {}).values():
|
||||
if not subs.get('language'):
|
||||
continue
|
||||
for ext, sub_url in subs.get('urls').items():
|
||||
subtitles.setdefault(subs['language'], []).append({
|
||||
'ext': ext,
|
||||
'url': urljoin(spl_url, sub_url)
|
||||
})
|
||||
|
||||
formats = list(self._extract_formats(spl_url, video))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'description': data.get('web_site_text'),
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
'display_id': display_id,
|
||||
'title': data.get('name'),
|
||||
'description': data.get('perex'),
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
'view_count': int_or_none(data.get('views')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
61
youtube_dl/extractor/streamsb.py
Normal file
61
youtube_dl/extractor/streamsb.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import binascii
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urljoin, url_basename
|
||||
|
||||
|
||||
def to_ascii_hex(str1):
|
||||
return binascii.hexlify(str1.encode('utf-8')).decode('ascii')
|
||||
|
||||
|
||||
def generate_random_string(length):
|
||||
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
class StreamsbIE(InfoExtractor):
|
||||
_DOMAINS = ('viewsb.com', )
|
||||
_VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS)
|
||||
_TEST = {
|
||||
'url': 'https://viewsb.com/dxfvlu4qanjx',
|
||||
'md5': '488d111a63415369bf90ea83adc8a325',
|
||||
'info_dict': {
|
||||
'id': 'dxfvlu4qanjx',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sintel'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path')
|
||||
iframe_url = urljoin('https://' + domain, iframe_rel_url)
|
||||
|
||||
iframe_data = self._download_webpage(iframe_url, video_id)
|
||||
app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50'
|
||||
|
||||
video_code = url_basename(iframe_url).rsplit('.')[0]
|
||||
|
||||
length = 12
|
||||
req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb'))
|
||||
ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req))
|
||||
|
||||
video_data = self._download_webpage(ereq, video_id, headers={
|
||||
'Referer': iframe_url,
|
||||
'watchsb': 'sbstream',
|
||||
})
|
||||
player_data = self._parse_json(video_data, video_id)
|
||||
title = player_data['stream_data']['title']
|
||||
formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
}
|
@@ -1,19 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
smuggle_url,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
)
|
||||
|
||||
from .dplay import DPlayIE
|
||||
|
||||
class Tele5IE(InfoExtractor):
|
||||
|
||||
class Tele5IE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TESTS = [{
|
||||
@@ -28,6 +25,7 @@ class Tele5IE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'No longer available: "404 Seite nicht gefunden"',
|
||||
}, {
|
||||
# jwplatform, nexx unavailable
|
||||
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
||||
@@ -42,7 +40,20 @@ class Tele5IE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [JWPlatformIE.ie_key()],
|
||||
'skip': 'No longer available, redirects to Filme page',
|
||||
}, {
|
||||
'url': 'https://tele5.de/mediathek/angel-of-mine/',
|
||||
'info_dict': {
|
||||
'id': '1252360',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220109',
|
||||
'timestamp': 1641762000,
|
||||
'title': 'Angel of Mine',
|
||||
'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||
'only_matching': True,
|
||||
@@ -64,45 +75,18 @@ class Tele5IE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
|
||||
|
||||
NEXX_ID_RE = r'\d{6,}'
|
||||
JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
|
||||
|
||||
def nexx_result(nexx_id):
|
||||
return self.url_result(
|
||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
|
||||
ie=NexxIE.ie_key(), video_id=nexx_id)
|
||||
|
||||
nexx_id = jwplatform_id = None
|
||||
|
||||
if video_id:
|
||||
if re.match(NEXX_ID_RE, video_id):
|
||||
return nexx_result(video_id)
|
||||
elif re.match(JWPLATFORM_ID_RE, video_id):
|
||||
jwplatform_id = video_id
|
||||
|
||||
if not nexx_id:
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def extract_id(pattern, name, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
|
||||
r'\s+id\s*=\s*["\']player_(%s)' % pattern,
|
||||
r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
|
||||
default=default)
|
||||
|
||||
nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
|
||||
if nexx_id:
|
||||
return nexx_result(nexx_id)
|
||||
|
||||
if not jwplatform_id:
|
||||
jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
|
||||
|
||||
return self.url_result(
|
||||
smuggle_url(
|
||||
'jwplatform:%s' % jwplatform_id,
|
||||
{'geo_countries': self._GEO_COUNTRIES}),
|
||||
ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
|
||||
player_info = extract_attributes(player_element)
|
||||
asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
|
||||
endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
|
||||
source_type = player_info.get('sourcetype')
|
||||
if source_type:
|
||||
endpoint = '%s-%s' % (source_type, endpoint)
|
||||
try:
|
||||
return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
|
||||
except ExtractorError as e:
|
||||
if getattr(e, 'message', '') == 'Missing deviceId in context':
|
||||
raise ExtractorError('DRM protected', cause=e, expected=True)
|
||||
raise
|
||||
|
@@ -34,7 +34,9 @@ class TelegraafIE(InfoExtractor):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
video_id = self._download_json(
|
||||
'https://www.telegraaf.nl/graphql', article_id, query={
|
||||
'https://app.telegraaf.nl/graphql', article_id,
|
||||
headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
|
||||
query={
|
||||
'query': '''{
|
||||
article(uid: %s) {
|
||||
videos {
|
||||
|
218
youtube_dl/extractor/thisvid.py
Normal file
218
youtube_dl/extractor/thisvid.py
Normal file
@@ -0,0 +1,218 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class ThisVidIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
|
||||
'md5': '839becb572995687e11a69dc4358a386',
|
||||
'info_dict': {
|
||||
'id': '3533241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sitting on ball tight jeans',
|
||||
'description': 'md5:372353bb995883d1b65fddf507489acd',
|
||||
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
|
||||
'uploader_id': '150629',
|
||||
'uploader': 'jeanslevisjeans',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://thisvid.com/embed/3533241/',
|
||||
'md5': '839becb572995687e11a69dc4358a386',
|
||||
'info_dict': {
|
||||
'id': '3533241',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sitting on ball tight jeans',
|
||||
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
|
||||
'uploader_id': '150629',
|
||||
'uploader': 'jeanslevisjeans',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, main_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
|
||||
webpage, 'title')
|
||||
|
||||
if type_ == 'embed':
|
||||
# look for more metadata
|
||||
video_alt_url = url_or_none(self._search_regex(
|
||||
r'''video_alt_url\s*:\s+'(%s/)',''' % (self._VALID_URL, ),
|
||||
webpage, 'video_alt_url', default=None))
|
||||
if video_alt_url and video_alt_url != url:
|
||||
webpage = self._download_webpage(
|
||||
video_alt_url, main_id,
|
||||
note='Redirecting embed to main page', fatal=False) or webpage
|
||||
|
||||
video_holder = get_element_by_class('video-holder', webpage) or ''
|
||||
if '>This video is a private video' in video_holder:
|
||||
self.raise_login_required(
|
||||
(clean_html(video_holder) or 'Private video').split('\n', 1)[0])
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
|
||||
webpage, 'uploader', default='')
|
||||
uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
|
||||
if len(uploader) == 2:
|
||||
# id must be non-empty, uploader could be ''
|
||||
uploader_id, uploader = uploader
|
||||
uploader = uploader or None
|
||||
else:
|
||||
uploader_id = uploader = None
|
||||
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
}, self.url_result(url, ie='Generic'))
|
||||
|
||||
|
||||
class ThisVidMemberIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/members/2140501/',
|
||||
'info_dict': {
|
||||
'id': '2140501',
|
||||
'title': 'Rafflesia\'s Profile',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/members/2140501/favourite_videos/',
|
||||
'info_dict': {
|
||||
'id': '2140501',
|
||||
'title': 'Rafflesia\'s Favourite Videos',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/members/636468/public_videos/',
|
||||
'info_dict': {
|
||||
'id': '636468',
|
||||
'title': 'Happymouth\'s Public Videos',
|
||||
},
|
||||
'playlist_mincount': 196,
|
||||
},
|
||||
]
|
||||
|
||||
def _urls(self, html):
|
||||
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (ThisVidIE._VALID_URL, ), html):
|
||||
yield m.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, pl_id)
|
||||
|
||||
title = re.split(
|
||||
r'(?i)\s*\|\s*ThisVid\.com\s*$',
|
||||
self._og_search_title(webpage, default=None) or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
|
||||
|
||||
def entries(page_url, html=None):
|
||||
for page in itertools.count(1):
|
||||
if not html:
|
||||
html = self._download_webpage(
|
||||
page_url, pl_id, note='Downloading page %d' % (page, ),
|
||||
fatal=False) or ''
|
||||
for u in self._urls(html):
|
||||
yield u
|
||||
next_page = get_element_by_class('pagination-next', html) or ''
|
||||
if next_page:
|
||||
# member list page
|
||||
next_page = urljoin(url, self._search_regex(
|
||||
r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
|
||||
next_page, 'next page link', group='url', default=None))
|
||||
# in case a member page should have pagination-next with empty link, not just `else:`
|
||||
if next_page is None:
|
||||
# playlist page
|
||||
parsed_url = compat_urlparse.urlparse(page_url)
|
||||
base_path, num = parsed_url.path.rsplit('/', 1)
|
||||
num = int_or_none(num)
|
||||
if num is None:
|
||||
base_path, num = parsed_url.path.rstrip('/'), 1
|
||||
parsed_url = parsed_url._replace(path=base_path + ('/%d' % (num + 1, )))
|
||||
next_page = compat_urlparse.urlunparse(parsed_url)
|
||||
if page_url == next_page:
|
||||
next_page = None
|
||||
if not next_page:
|
||||
break
|
||||
page_url, html = next_page, None
|
||||
|
||||
return self.playlist_from_matches(
|
||||
entries(url, webpage), playlist_id=pl_id, playlist_title=title, ie='ThisVid')
|
||||
|
||||
|
||||
class ThisVidPlaylistIE(ThisVidMemberIE):
|
||||
_VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
|
||||
'info_dict': {
|
||||
'id': '6615',
|
||||
'title': 'Underwear Stuff',
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
|
||||
'info_dict': {
|
||||
'id': '1072387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Italian Booty 28',
|
||||
'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
|
||||
'uploader_id': '367912',
|
||||
'uploader': 'Jcmusclefun',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_video_url(self, pl_url):
|
||||
video_id = re.match(self._VALID_URL, pl_url).group('video_id')
|
||||
return urljoin(pl_url, '/videos/%s/' % (video_id, ))
|
||||
|
||||
def _urls(self, html):
|
||||
for m in re.finditer(r'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>%s\b)[^>]+>''' % (self._VALID_URL, ), html):
|
||||
yield self._get_video_url(m.group('url'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
pl_id = self._match_id(url)
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just the featured video because of --no-playlist')
|
||||
return self.url_result(self._get_video_url(url), 'ThisVid')
|
||||
|
||||
self.to_screen(
|
||||
'Downloading playlist %s - add --no-playlist to download just the featured video' % (pl_id, ))
|
||||
result = super(ThisVidPlaylistIE, self)._real_extract(url)
|
||||
|
||||
# rework title returned as `the title - the title`
|
||||
title = result['title']
|
||||
t_len = len(title)
|
||||
if t_len > 5 and t_len % 2 != 0:
|
||||
t_len = t_len // 2
|
||||
if title[t_len] == '-':
|
||||
title = [t.strip() for t in (title[:t_len], title[t_len + 1:])]
|
||||
if title[0] and title[0] == title[1]:
|
||||
result['title'] = title[0]
|
||||
return result
|
@@ -41,8 +41,16 @@ class TV2DKIE(InfoExtractor):
|
||||
'duration': 1347,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||
'info_dict': {
|
||||
'id': '1_7iwll9n0',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20211027',
|
||||
'title': 'Gadekamp #6 - Højhuse i København',
|
||||
'uploader_id': 'tv2lorry',
|
||||
'timestamp': 1635345229,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
@@ -91,11 +99,14 @@ class TV2DKIE(InfoExtractor):
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if not entries:
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
|
||||
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
||||
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
||||
partner_id = self._search_regex(
|
||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||
'partner id')
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
|
@@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://uktvplay\.(?:uktv\.)?co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'info_dict': {
|
||||
|
@@ -95,7 +95,6 @@ class UOLIE(InfoExtractor):
|
||||
if v:
|
||||
query[k] = v
|
||||
f_url = update_url_query(f_url, query)
|
||||
format_id = format_id
|
||||
if format_id == 'HLS':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
f_url, media_id, 'mp4', 'm3u8_native',
|
||||
|
@@ -4,7 +4,11 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
@@ -23,9 +27,10 @@ class URPlayIE(InfoExtractor):
|
||||
'upload_date': '20171214',
|
||||
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
||||
'duration': 2269,
|
||||
'categories': ['Kultur & historia'],
|
||||
'categories': ['Vetenskap & teknik'],
|
||||
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||
'age_limit': 15,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||
@@ -50,11 +55,19 @@ class URPlayIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vid = int(video_id)
|
||||
accessible_episodes = self._parse_json(self._html_search_regex(
|
||||
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
|
||||
urplayer_data = self._search_regex(
|
||||
r'(?s)\bid\s*=\s*"__NEXT_DATA__"[^>]*>\s*({.+?})\s*</script',
|
||||
webpage, 'urplayer next data', fatal=False) or {}
|
||||
if urplayer_data:
|
||||
urplayer_data = self._parse_json(urplayer_data, video_id, fatal=False)
|
||||
urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
|
||||
if not urplayer_data:
|
||||
raise ExtractorError('Unable to parse __NEXT_DATA__')
|
||||
else:
|
||||
accessible_episodes = self._parse_json(self._html_search_regex(
|
||||
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
||||
episode = urplayer_data['title']
|
||||
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
||||
host = self._download_json(
|
||||
@@ -72,6 +85,30 @@ class URPlayIE(InfoExtractor):
|
||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
||||
def parse_lang_code(code):
|
||||
"3-character language code or None (utils candidate)"
|
||||
if code is None:
|
||||
return
|
||||
lang = code.lower()
|
||||
if not ISO639Utils.long2short(lang):
|
||||
lang = ISO639Utils.short2long(lang)
|
||||
return lang or None
|
||||
|
||||
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
|
||||
if (k in ('sd', 'hd') or not isinstance(v, dict)):
|
||||
continue
|
||||
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
|
||||
if not sttl_url:
|
||||
continue
|
||||
lang = parse_lang_code(lang)
|
||||
if not lang:
|
||||
continue
|
||||
sttl = subtitles.get(lang) or []
|
||||
sttl.append({'ext': k, 'url': sttl_url, })
|
||||
subtitles[lang] = sttl
|
||||
|
||||
image = urplayer_data.get('image') or {}
|
||||
thumbnails = []
|
||||
for k, v in image.items():
|
||||
@@ -104,4 +141,7 @@ class URPlayIE(InfoExtractor):
|
||||
'season': series.get('label'),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||
'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
|
||||
for a in urplayer_data.get('ageRanges', []))),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_codecs,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -19,6 +20,7 @@ from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_struct_pack,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
@@ -45,10 +47,24 @@ class VideaIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
|
||||
'only_matching': True,
|
||||
'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
|
||||
'info_dict': {
|
||||
'id': 'jAHDWfWSJH5XuFhH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Supercars előzés',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 64,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
|
||||
'only_matching': True,
|
||||
'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
|
||||
'info_dict': {
|
||||
'id': '8YfIAjxwWGwT8HVQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Az őrült kígyász 285 kígyót enged szabadon',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 21,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
'only_matching': True,
|
||||
@@ -91,13 +107,20 @@ class VideaIE(InfoExtractor):
|
||||
k = S[(S[i] + S[j]) % 256]
|
||||
res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
|
||||
|
||||
return res.decode()
|
||||
return res.decode('utf-8')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
query = {'v': video_id}
|
||||
player_page = self._download_webpage(
|
||||
'https://videa.hu/player', video_id, query=query)
|
||||
video_page = self._download_webpage(url, video_id)
|
||||
|
||||
if 'videa.hu/player' in url:
|
||||
player_url = url
|
||||
player_page = video_page
|
||||
else:
|
||||
player_url = self._search_regex(
|
||||
r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
|
||||
player_url = urljoin(url, player_url)
|
||||
player_page = self._download_webpage(player_url, video_id)
|
||||
|
||||
nonce = self._search_regex(
|
||||
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
|
||||
@@ -107,6 +130,7 @@ class VideaIE(InfoExtractor):
|
||||
for i in range(0, 32):
|
||||
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
||||
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
|
||||
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
||||
query['_s'] = random_seed
|
||||
query['_t'] = result[:16]
|
||||
@@ -121,13 +145,13 @@ class VideaIE(InfoExtractor):
|
||||
compat_b64decode(b64_info), key), video_id)
|
||||
|
||||
video = xpath_element(info, './video', 'video')
|
||||
if not video:
|
||||
if video is None:
|
||||
raise ExtractorError(xpath_element(
|
||||
info, './error', fatal=True), expected=True)
|
||||
sources = xpath_element(
|
||||
info, './video_sources', 'sources', fatal=True)
|
||||
hash_values = xpath_element(
|
||||
info, './hash_values', 'hash values', fatal=True)
|
||||
info, './hash_values', 'hash values', fatal=False)
|
||||
|
||||
title = xpath_text(video, './title', fatal=True)
|
||||
|
||||
@@ -136,15 +160,16 @@ class VideaIE(InfoExtractor):
|
||||
source_url = source.text
|
||||
source_name = source.get('name')
|
||||
source_exp = source.get('exp')
|
||||
if not (source_url and source_name and source_exp):
|
||||
if not (source_url and source_name):
|
||||
continue
|
||||
hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
|
||||
if not hash_value:
|
||||
continue
|
||||
source_url = update_url_query(source_url, {
|
||||
'md5': hash_value,
|
||||
'expires': source_exp,
|
||||
})
|
||||
hash_value = (
|
||||
xpath_text(hash_values, 'hash_value_' + source_name)
|
||||
if hash_values is not None else None)
|
||||
if hash_value and source_exp:
|
||||
source_url = update_url_query(source_url, {
|
||||
'md5': hash_value,
|
||||
'expires': source_exp,
|
||||
})
|
||||
f = parse_codecs(source.get('codecs'))
|
||||
f.update({
|
||||
'url': self._proto_relative_url(source_url),
|
||||
|
@@ -1,38 +1,29 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class VikiBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
|
||||
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s'
|
||||
|
||||
_DEVICE_ID = '112395910d'
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '6.0.0'
|
||||
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
||||
_APP_VERSION = '6.11.3'
|
||||
_APP_SECRET = 'd96704b180208dbb2efa30fe44c48bd8690441af9f567ba8fd710a72badc85198f7472'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'viki'
|
||||
@@ -45,43 +36,60 @@ class VikiBaseIE(InfoExtractor):
|
||||
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||
}
|
||||
|
||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||
def _stream_headers(self, timestamp, sig):
|
||||
return {
|
||||
'X-Viki-manufacturer': 'vivo',
|
||||
'X-Viki-device-model': 'vivo 1606',
|
||||
'X-Viki-device-os-ver': '6.0.1',
|
||||
'X-Viki-connection-type': 'WIFI',
|
||||
'X-Viki-carrier': '',
|
||||
'X-Viki-as-id': '100005a-1625321982-3932',
|
||||
'timestamp': str(timestamp),
|
||||
'signature': str(sig),
|
||||
'x-viki-app-ver': self._APP_VERSION
|
||||
}
|
||||
|
||||
def _api_query(self, path, version=4, **kwargs):
|
||||
path += '?' if '?' not in path else '&'
|
||||
if not timestamp:
|
||||
timestamp = int(time.time())
|
||||
query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
|
||||
app = self._APP
|
||||
query = '/v{version}/{path}app={app}'.format(**locals())
|
||||
if self._token:
|
||||
query += '&token=%s' % self._token
|
||||
return query + ''.join('&{name}={val}.format(**locals())' for name, val in kwargs.items())
|
||||
|
||||
def _sign_query(self, path):
|
||||
timestamp = int(time.time())
|
||||
query = self._api_query(path, version=5)
|
||||
sig = hmac.new(
|
||||
self._APP_SECRET.encode('ascii'),
|
||||
query.encode('ascii'),
|
||||
hashlib.sha1
|
||||
).hexdigest()
|
||||
url = self._API_URL_TEMPLATE % (query, sig)
|
||||
return sanitized_Request(
|
||||
url, json.dumps(post_data).encode('utf-8')) if post_data else url
|
||||
'{query}&t={timestamp}'.format(**locals()).encode('ascii'),
|
||||
hashlib.sha1).hexdigest()
|
||||
return timestamp, sig, self._API_URL_TEMPLATE % query
|
||||
|
||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
||||
def _call_api(
|
||||
self, path, video_id, note='Downloading JSON metadata', data=None, query=None, fatal=True):
|
||||
if query is None:
|
||||
timestamp, sig, url = self._sign_query(path)
|
||||
else:
|
||||
url = self._API_URL_TEMPLATE % self._api_query(path, version=4)
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, timestamp, post_data), video_id, note,
|
||||
headers={'x-viki-app-ver': self._APP_VERSION})
|
||||
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
if error == 'invalid timestamp':
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, int(resp['current_timestamp']), post_data),
|
||||
video_id, '%s (retry)' % note)
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
self._raise_error(resp['error'])
|
||||
url, video_id, note, fatal=fatal, query=query,
|
||||
data=json.dumps(data).encode('utf-8') if data else None,
|
||||
headers=({'x-viki-app-ver': self._APP_VERSION} if data
|
||||
else self._stream_headers(timestamp, sig) if query is None
|
||||
else None), expected_status=400) or {}
|
||||
|
||||
self._raise_error(resp.get('error'), fatal)
|
||||
return resp
|
||||
|
||||
def _raise_error(self, error):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error),
|
||||
expected=True)
|
||||
def _raise_error(self, error, fatal=True):
|
||||
if error is None:
|
||||
return
|
||||
msg = '%s said: %s' % (self.IE_NAME, error)
|
||||
if fatal:
|
||||
raise ExtractorError(msg, expected=True)
|
||||
else:
|
||||
self.report_warning(msg)
|
||||
|
||||
def _check_errors(self, data):
|
||||
for reason, status in (data.get('blocking') or {}).items():
|
||||
@@ -90,9 +98,10 @@ class VikiBaseIE(InfoExtractor):
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
elif reason == 'paywall':
|
||||
if try_get(data, lambda x: x['paywallable']['tvod']):
|
||||
self._raise_error('This video is for rent only or TVOD (Transactional Video On demand)')
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, message), expected=True)
|
||||
self._raise_error(message)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -102,35 +111,39 @@ class VikiBaseIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'login_id': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
login = self._call_api(
|
||||
'sessions.json', None,
|
||||
'Logging in', post_data=login_form)
|
||||
|
||||
self._token = login.get('token')
|
||||
self._token = self._call_api(
|
||||
'sessions.json', None, 'Logging in', fatal=False,
|
||||
data={'username': username, 'password': password}).get('token')
|
||||
if not self._token:
|
||||
self.report_warning('Unable to get session token, login has probably failed')
|
||||
self.report_warning('Login Failed: Unable to get session token')
|
||||
|
||||
@staticmethod
|
||||
def dict_selection(dict_obj, preferred_key, allow_fallback=True):
|
||||
def dict_selection(dict_obj, preferred_key):
|
||||
if preferred_key in dict_obj:
|
||||
return dict_obj.get(preferred_key)
|
||||
|
||||
if not allow_fallback:
|
||||
return
|
||||
|
||||
filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
|
||||
return filtered_dict[0] if filtered_dict else None
|
||||
return dict_obj[preferred_key]
|
||||
return (list(filter(None, dict_obj.values())) or [None])[0]
|
||||
|
||||
|
||||
class VikiIE(VikiBaseIE):
|
||||
IE_NAME = 'viki'
|
||||
_VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'note': 'Free non-DRM video with storyboards in MPD',
|
||||
'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1',
|
||||
'info_dict': {
|
||||
'id': '1175236v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Choosing Spouse by Lottery - Episode 1',
|
||||
'timestamp': 1606463239,
|
||||
'age_limit': 12,
|
||||
'uploader': 'FCC',
|
||||
'upload_date': '20201127',
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
@@ -146,7 +159,7 @@ class VikiIE(VikiBaseIE):
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
'skip': 'Content is only available to Viki Pass Plus subscribers',
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
# clip
|
||||
@@ -178,11 +191,11 @@ class VikiIE(VikiBaseIE):
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
'skip': 'Page not found!',
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '0a53dc252e6e690feccd756861495a8c',
|
||||
'md5': '670440c79f7109ca6564d4c7f24e3e81',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
@@ -193,7 +206,7 @@ class VikiIE(VikiBaseIE):
|
||||
'upload_date': '20100405',
|
||||
'uploader': 'group8',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
'age_limit': 15,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
@@ -224,7 +237,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': '41faaba0de90483fb4848952af7c7d0d',
|
||||
'md5': '78bf49fdaa51f9e7f9150262a9ef9bdf',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
@@ -232,8 +245,8 @@ class VikiIE(VikiBaseIE):
|
||||
'upload_date': '20111122',
|
||||
'timestamp': 1321985454,
|
||||
'description': 'md5:44b1e46619df3a072294645c770cef36',
|
||||
'title': 'Love In Magic',
|
||||
'age_limit': 13,
|
||||
'title': 'Love in Magic',
|
||||
'age_limit': 15,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
@@ -244,45 +257,53 @@ class VikiIE(VikiBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
resp = self._download_json(
|
||||
'https://www.viki.com/api/videos/' + video_id,
|
||||
video_id, 'Downloading video JSON', headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-app-ver': '3.0.0',
|
||||
})
|
||||
video = resp['video']
|
||||
video = self._call_api('videos/{0}.json'.format(video_id), video_id, 'Downloading video JSON', query={})
|
||||
|
||||
self._check_errors(video)
|
||||
|
||||
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
||||
title = try_get(video, lambda x: x['titles']['en'], str)
|
||||
episode_number = int_or_none(video.get('number'))
|
||||
if not title:
|
||||
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = '%s - %s' % (container_title, title)
|
||||
if container_title and title == video_id:
|
||||
title = container_title
|
||||
else:
|
||||
title = '%s - %s' % (container_title, title)
|
||||
|
||||
resp = self._call_api(
|
||||
'playback_streams/%s.json?drms=dt3&device_id=%s' % (video_id, self._DEVICE_ID),
|
||||
video_id, 'Downloading video streams JSON')['main'][0]
|
||||
|
||||
mpd_url = resp['url']
|
||||
# 720p is hidden in another MPD which can be found in the current manifest content
|
||||
mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
|
||||
mpd_url = self._search_regex(
|
||||
r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
|
||||
if 'mpdhd_high' not in mpd_url:
|
||||
# Modify the URL to get 1080p
|
||||
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail['url'],
|
||||
} for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')]
|
||||
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail in (video.get('images') or {}).items():
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail.get('url'),
|
||||
})
|
||||
stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id'])
|
||||
subtitles = dict((lang, [{
|
||||
'ext': ext,
|
||||
'url': self._API_URL_TEMPLATE % self._api_query(
|
||||
'videos/{0}/auth_subtitles/{1}.{2}'.format(video_id, lang, ext), stream_id=stream_id)
|
||||
} for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys())
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': subtitles_format,
|
||||
'url': self._prepare_call(
|
||||
'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
|
||||
} for subtitles_format in ('srt', 'vtt')]
|
||||
|
||||
result = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
@@ -296,79 +317,6 @@ class VikiIE(VikiBaseIE):
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
def add_format(format_id, format_dict, protocol='http'):
|
||||
# rtmps URLs does not seem to work
|
||||
if protocol == 'rtmps':
|
||||
return
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
return
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||
stream = qs.get('stream', [None])[0]
|
||||
if stream:
|
||||
format_url = base64.b64decode(stream).decode()
|
||||
if format_id in ('m3u8', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native',
|
||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||
# Despite CODECS metadata in m3u8 all video-only formats
|
||||
# are actually video+audio
|
||||
for f in m3u8_formats:
|
||||
if '_drm/index_' in f['url']:
|
||||
continue
|
||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||
f['acodec'] = None
|
||||
formats.append(f)
|
||||
elif format_id in ('mpd', 'dash'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||
elif format_url.startswith('rtmp'):
|
||||
mobj = re.search(
|
||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||
format_url)
|
||||
if not mobj:
|
||||
return
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%s' % format_id,
|
||||
'ext': 'flv',
|
||||
'url': mobj.group('url'),
|
||||
'play_path': mobj.group('playpath'),
|
||||
'app': mobj.group('app'),
|
||||
'page_url': url,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': '%s-%s' % (format_id, protocol),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||
})
|
||||
|
||||
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||
add_format(format_id, format_dict)
|
||||
if not formats:
|
||||
streams = self._call_api(
|
||||
'videos/%s/streams.json' % video_id, video_id,
|
||||
'Downloading video streams JSON')
|
||||
|
||||
if 'external' in streams:
|
||||
result.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': streams['external']['url'],
|
||||
})
|
||||
return result
|
||||
|
||||
for format_id, stream_dict in streams.items():
|
||||
for protocol, format_dict in stream_dict.items():
|
||||
add_format(format_id, format_dict, protocol)
|
||||
self._sort_formats(formats)
|
||||
|
||||
result['formats'] = formats
|
||||
return result
|
||||
|
||||
|
||||
class VikiChannelIE(VikiBaseIE):
|
||||
IE_NAME = 'viki:channel'
|
||||
@@ -378,9 +326,9 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'info_dict': {
|
||||
'id': '50c',
|
||||
'title': 'Boys Over Flowers',
|
||||
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
||||
'description': 'md5:f08b679c200e1a273c695fe9986f21d7',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
|
||||
'info_dict': {
|
||||
@@ -401,33 +349,38 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PER_PAGE = 25
|
||||
_video_types = ('episodes', 'movies', 'clips', 'trailers')
|
||||
|
||||
def _entries(self, channel_id):
|
||||
params = {
|
||||
'app': self._APP, 'token': self._token, 'only_ids': 'true',
|
||||
'direction': 'asc', 'sort': 'number', 'per_page': 30
|
||||
}
|
||||
video_types = self._video_types
|
||||
for video_type in video_types:
|
||||
if video_type not in self._video_types:
|
||||
self.report_warning('Unknown video_type: ' + video_type)
|
||||
page_num = 0
|
||||
while True:
|
||||
page_num += 1
|
||||
params['page'] = page_num
|
||||
res = self._call_api(
|
||||
'containers/{channel_id}/{video_type}.json'.format(**locals()), channel_id, query=params, fatal=False,
|
||||
note='Downloading %s JSON page %d' % (video_type.title(), page_num))
|
||||
|
||||
for video_id in res.get('response') or []:
|
||||
yield self.url_result('https://www.viki.com/videos/' + video_id, VikiIE.ie_key(), video_id)
|
||||
if not res.get('more'):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel = self._call_api(
|
||||
'containers/%s.json' % channel_id, channel_id,
|
||||
'Downloading channel JSON')
|
||||
channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON')
|
||||
|
||||
self._check_errors(channel)
|
||||
|
||||
title = self.dict_selection(channel['titles'], 'en')
|
||||
|
||||
description = self.dict_selection(channel['descriptions'], 'en')
|
||||
|
||||
entries = []
|
||||
for video_type in ('episodes', 'clips', 'movies'):
|
||||
for page_num in itertools.count(1):
|
||||
page = self._call_api(
|
||||
'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
|
||||
% (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
|
||||
'Downloading %s JSON page #%d' % (video_type, page_num))
|
||||
for video in page['response']:
|
||||
video_id = video['id']
|
||||
entries.append(self.url_result(
|
||||
'https://www.viki.com/videos/%s' % video_id, 'Viki'))
|
||||
if not page['pagination']['next']:
|
||||
break
|
||||
|
||||
return self.playlist_result(entries, channel_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), channel_id,
|
||||
self.dict_selection(channel['titles'], 'en'),
|
||||
self.dict_selection(channel['descriptions'], 'en'))
|
||||
|
@@ -261,27 +261,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
# _VALID_URL matches Vimeo URLs
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
www|
|
||||
player
|
||||
)
|
||||
\.
|
||||
)?
|
||||
vimeo(?:pro)?\.com/
|
||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)?
|
||||
(?:
|
||||
(?:
|
||||
play_redirect_hls|
|
||||
moogaloop\.swf)\?clip_id=
|
||||
)?
|
||||
(?:videos?/)?
|
||||
(?P<id>[0-9]+)
|
||||
(?:/(?P<unlisted_hash>[\da-f]{10}))?
|
||||
/?(?:[?&].*)?(?:[#].*)?$
|
||||
'''
|
||||
https?://
|
||||
(?:
|
||||
(?:
|
||||
www|
|
||||
player
|
||||
)
|
||||
\.
|
||||
)?
|
||||
vimeo(?:pro)?\.com/
|
||||
(?:
|
||||
(?P<u>user)|
|
||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)??
|
||||
(?P<q>
|
||||
(?:
|
||||
play_redirect_hls|
|
||||
moogaloop\.swf)\?clip_id=
|
||||
)?
|
||||
(?:videos?/)?
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?(u)
|
||||
/(?!videos|likes)[^/?#]+/?|
|
||||
(?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
|
||||
)
|
||||
(?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
|
||||
'''
|
||||
IE_NAME = 'vimeo'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -517,15 +523,34 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/7809605',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||
'only_matching': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
# similar, but all numeric: ID must be 581039021, not 9603038895
|
||||
# issue #29690
|
||||
'url': 'https://vimeo.com/581039021/9603038895',
|
||||
'info_dict': {
|
||||
'id': '581039021',
|
||||
# these have to be provided but we don't care
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1627621014,
|
||||
'title': 're:.+',
|
||||
'uploader_id': 're:.+',
|
||||
'uploader': 're:.+',
|
||||
'upload_date': r're:\d+',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# user playlist alias -> https://vimeo.com/258705797
|
||||
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
||||
'only_matching': True,
|
||||
},
|
||||
# https://gettingthingsdone.com/workflowmap/
|
||||
# vimeo embed with check-password page protected by Referer header
|
||||
]
|
||||
@@ -649,7 +674,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
|
||||
if '//player.vimeo.com/video/' in url:
|
||||
config = self._parse_json(self._search_regex(
|
||||
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
||||
r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(
|
||||
redirect_url, video_id, headers)
|
||||
|
@@ -64,6 +64,18 @@ class VVVVIDIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video_type == 'video/dash'
|
||||
'url': 'https://www.vvvvid.it/show/683/made-in-abyss/1542/693786/nanachi',
|
||||
'info_dict': {
|
||||
'id': '693786',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nanachi',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||
'only_matching': True
|
||||
@@ -205,6 +217,9 @@ class VVVVIDIE(InfoExtractor):
|
||||
})
|
||||
is_youtube = True
|
||||
break
|
||||
elif video_type == 'video/dash':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||
|
@@ -57,7 +57,7 @@ class WatIE(InfoExtractor):
|
||||
# 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)
|
||||
video_data = self._download_json(
|
||||
'https://mediainfo.tf1.fr/mediainfocombo/' + video_id,
|
||||
video_id, query={'context': 'MYTF1'})
|
||||
video_id, query={'context': 'MYTF1', 'pver': '4001000'})
|
||||
video_info = video_data['media']
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
|
@@ -10,6 +10,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
@@ -22,9 +23,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class WDRIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
|
||||
__API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s'
|
||||
_VALID_URL = (r'(?:https?:' + __API_URL_TPL) % (r'\d+', r'(?=\d+\.js)|wdr:)(?P<id>\d{6,})')
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1557833',
|
||||
@@ -32,11 +34,20 @@ class WDRIE(InfoExtractor):
|
||||
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
||||
'upload_date': '20180112',
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _asset_url(self, wdr_id):
|
||||
id_len = max(len(wdr_id), 5)
|
||||
return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if url.startswith('wdr:'):
|
||||
video_id = url[4:]
|
||||
url = self._asset_url(video_id)
|
||||
|
||||
metadata = self._download_json(
|
||||
url, video_id, transform_source=strip_jsonp)
|
||||
|
||||
@@ -115,10 +126,10 @@ class WDRIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class WDRPageIE(InfoExtractor):
|
||||
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
||||
class WDRPageIE(WDRIE):
|
||||
_MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
|
||||
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -159,11 +170,11 @@ class WDRPageIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1406149',
|
||||
'id': 'mdb-2296252',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'alt_title': 'WDR Fernsehen Live',
|
||||
'upload_date': '20150101',
|
||||
'upload_date': '20201112',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
@@ -172,7 +183,7 @@ class WDRPageIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||
'playlist_mincount': 7,
|
||||
'playlist_mincount': 6,
|
||||
'info_dict': {
|
||||
'id': 'aktuelle-stunde-120',
|
||||
},
|
||||
@@ -180,10 +191,10 @@ class WDRPageIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||
'info_dict': {
|
||||
'id': 'mdb-1552552',
|
||||
'id': 'mdb-2627637',
|
||||
'ext': 'mp4',
|
||||
'upload_date': 're:^[0-9]{8}$',
|
||||
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
||||
'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
|
||||
},
|
||||
'skip': 'The id changes from week to week because of the new episode'
|
||||
},
|
||||
@@ -196,6 +207,7 @@ class WDRPageIE(InfoExtractor):
|
||||
'upload_date': '20130919',
|
||||
'title': 'Sachgeschichte - Achterbahn ',
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
||||
@@ -221,6 +233,7 @@ class WDRPageIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
||||
@@ -234,7 +247,7 @@ class WDRPageIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
@@ -260,6 +273,14 @@ class WDRPageIE(InfoExtractor):
|
||||
jsonp_url = try_get(
|
||||
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
||||
if jsonp_url:
|
||||
# metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
|
||||
clip_id = media_link_obj['mediaObj'].get('ref')
|
||||
if jsonp_url.endswith('.assetjsonp'):
|
||||
asset = self._download_json(
|
||||
jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
|
||||
clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
|
||||
if clip_id:
|
||||
jsonp_url = self._asset_url(clip_id[4:])
|
||||
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
||||
|
||||
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
||||
@@ -279,16 +300,14 @@ class WDRPageIE(InfoExtractor):
|
||||
class WDRElefantIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
|
||||
'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe',
|
||||
# adaptive stream: unstable file MD5
|
||||
'info_dict': {
|
||||
'title': 'Folge Oster-Spezial 2015',
|
||||
'id': 'mdb-1088195',
|
||||
'title': 'Wippe',
|
||||
'id': 'mdb-1198320',
|
||||
'ext': 'mp4',
|
||||
'age_limit': None,
|
||||
'upload_date': '20150406'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'upload_date': '20071003'
|
||||
},
|
||||
}
|
||||
|
||||
@@ -323,6 +342,7 @@ class WDRMobileIE(InfoExtractor):
|
||||
/[0-9]+/[0-9]+/
|
||||
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
||||
IE_NAME = 'wdr:mobile'
|
||||
_WORKING = False # no such domain
|
||||
_TEST = {
|
||||
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
||||
'info_dict': {
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
@@ -23,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
|
||||
_DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:.+?\.)?%s/
|
||||
@@ -34,7 +35,7 @@ class XHamsterIE(InfoExtractor):
|
||||
''' % _DOMAINS
|
||||
_TESTS = [{
|
||||
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'md5': '98b4687efb1ffd331c4197854dc09e8f',
|
||||
'md5': '34e1ab926db5dc2750fed9e1f34304bb',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
|
||||
@@ -43,6 +44,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'timestamp': 1350194821,
|
||||
'upload_date': '20121014',
|
||||
'uploader': 'Ruseful2011',
|
||||
'uploader_id': 'ruseful2011',
|
||||
'duration': 893,
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -72,6 +74,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'timestamp': 1454948101,
|
||||
'upload_date': '20160208',
|
||||
'uploader': 'parejafree',
|
||||
'uploader_id': 'parejafree',
|
||||
'duration': 72,
|
||||
'age_limit': 18,
|
||||
},
|
||||
@@ -117,6 +120,12 @@ class XHamsterIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -245,6 +254,7 @@ class XHamsterIE(InfoExtractor):
|
||||
else:
|
||||
categories = None
|
||||
|
||||
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -253,6 +263,8 @@ class XHamsterIE(InfoExtractor):
|
||||
'timestamp': int_or_none(video.get('created')),
|
||||
'uploader': try_get(
|
||||
video, lambda x: x['author']['name'], compat_str),
|
||||
'uploader_url': uploader_url,
|
||||
'uploader_id': uploader_url.split('/')[-1] if uploader_url else None,
|
||||
'thumbnail': video.get('thumbURL'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
@@ -261,7 +273,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'dislike_count': int_or_none(try_get(
|
||||
video, lambda x: x['rating']['dislikes'], int)),
|
||||
'comment_count': int_or_none(video.get('views')),
|
||||
'age_limit': age_limit,
|
||||
'age_limit': age_limit if age_limit is not None else 18,
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -352,6 +364,7 @@ class XHamsterIE(InfoExtractor):
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader.lower() if uploader else None,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
@@ -420,6 +433,12 @@ class XHamsterUserIE(InfoExtractor):
|
||||
'id': 'firatkaan',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
'url': 'https://xhday.com/users/mobhunter',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xhvid.com/users/pelushe21',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, user_id):
|
||||
|
@@ -82,7 +82,7 @@ class XVideosIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.xvideos.com/video%s/' % video_id, video_id)
|
||||
'https://www.xvideos.com/video%s/0' % video_id, video_id)
|
||||
|
||||
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
||||
if mobj:
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -137,9 +138,10 @@ class YouPornIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
[r'UPLOADED:\s*<span>([^<]+)',
|
||||
(r'UPLOADED:\s*<span>([^<]+)',
|
||||
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||
r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
|
||||
r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -7,13 +7,15 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
NO_DEFAULT,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
qualities,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
@@ -56,28 +58,39 @@ class ZDFBaseIE(InfoExtractor):
|
||||
format_urls.add(format_url)
|
||||
mime_type = meta.get('mimeType')
|
||||
ext = determine_ext(format_url)
|
||||
|
||||
join_nonempty = lambda s, l: s.join(filter(None, l))
|
||||
meta_map = lambda t: map(lambda x: str_or_none(meta.get(x)), t)
|
||||
|
||||
if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
new_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||
entry_protocol='m3u8_native', fatal=False))
|
||||
entry_protocol='m3u8_native', fatal=False)
|
||||
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
|
||||
new_formats = self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
|
||||
else:
|
||||
f = parse_codecs(meta.get('mimeCodec'))
|
||||
if not f:
|
||||
data = meta.get('type', '').split('_')
|
||||
if try_get(data, lambda x: x[2]) == ext:
|
||||
f = dict(zip(('vcodec', 'acodec'), data[1]))
|
||||
|
||||
format_id = ['http']
|
||||
for p in (meta.get('type'), meta.get('quality')):
|
||||
if p and isinstance(p, compat_str):
|
||||
format_id.append(p)
|
||||
format_id.extend(join_nonempty('-', meta_map(('type', 'quality'))))
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'format_id': '-'.join(format_id),
|
||||
'format_note': meta.get('quality'),
|
||||
'language': meta.get('language'),
|
||||
'quality': qualities(self._QUALITIES)(meta.get('quality')),
|
||||
'preference': -10,
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
|
||||
})
|
||||
formats.append(f)
|
||||
new_formats = [f]
|
||||
|
||||
formats.extend(merge_dicts(f, {
|
||||
'format_note': join_nonempty(',', meta_map(('quality', 'class'))),
|
||||
'language': meta.get('language'),
|
||||
'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1,
|
||||
'quality': qualities(self._QUALITIES)(meta.get('quality')),
|
||||
}) for f in new_formats)
|
||||
|
||||
def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer):
|
||||
ptmd = self._call_api(
|
||||
@@ -106,6 +119,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
'type': f.get('type'),
|
||||
'mimeType': f.get('mimeType'),
|
||||
'quality': quality.get('quality'),
|
||||
'class': track.get('class'),
|
||||
'language': track.get('language'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
@@ -145,6 +159,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'timestamp': 1613948400,
|
||||
'upload_date': '20210221',
|
||||
},
|
||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||
}, {
|
||||
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
||||
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
||||
@@ -158,6 +173,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
},
|
||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
||||
'info_dict': {
|
||||
@@ -168,6 +184,20 @@ class ZDFIE(ZDFBaseIE):
|
||||
'duration': 2615,
|
||||
'timestamp': 1465021200,
|
||||
'upload_date': '20160604',
|
||||
'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
||||
'md5': '1b93bdec7d02fc0b703c5e7687461628',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'video_funk_1770473',
|
||||
'duration': 1278,
|
||||
'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
|
||||
'title': 'Alles ist verzaubert',
|
||||
'timestamp': 1635520560,
|
||||
'upload_date': '20211029',
|
||||
'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
|
||||
@@ -190,6 +220,30 @@ class ZDFIE(ZDFBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
|
||||
'info_dict': {
|
||||
'id': 'video_artede_083871-001-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tödliche Flucht (1/6)',
|
||||
'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
|
||||
'duration': 3193.0,
|
||||
'timestamp': 1641355200,
|
||||
'upload_date': '20220105',
|
||||
},
|
||||
'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
|
||||
'info_dict': {
|
||||
'id': '191205_1800_sendung_sok8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Das Geld anderer Leute',
|
||||
'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
|
||||
'duration': 2581.0,
|
||||
'timestamp': 1654790700,
|
||||
'upload_date': '20220609',
|
||||
'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_entry(self, url, player, content, video_id):
|
||||
@@ -197,12 +251,18 @@ class ZDFIE(ZDFBaseIE):
|
||||
|
||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
|
||||
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
|
||||
def get_ptmd_path(d):
|
||||
return (
|
||||
d.get('http://zdf.de/rels/streams/ptmd')
|
||||
or d.get('http://zdf.de/rels/streams/ptmd-template',
|
||||
'').replace('{playerId}', 'ngplayer_2_4'))
|
||||
|
||||
ptmd_path = get_ptmd_path(try_get(t, lambda x: x['streams']['default'], dict) or {})
|
||||
if not ptmd_path:
|
||||
ptmd_path = get_ptmd_path(t)
|
||||
|
||||
if not ptmd_path:
|
||||
ptmd_path = t[
|
||||
'http://zdf.de/rels/streams/ptmd-template'].replace(
|
||||
'{playerId}', 'ngplayer_2_4')
|
||||
raise ExtractorError('Could not extract ptmd_path')
|
||||
|
||||
info = self._extract_ptmd(
|
||||
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
||||
@@ -245,15 +305,16 @@ class ZDFIE(ZDFBaseIE):
|
||||
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
document = video['document']
|
||||
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
formats = []
|
||||
format_urls = set()
|
||||
for f in document['formitaeten']:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||
document = formitaeten and video['document']
|
||||
if formitaeten:
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
format_urls = set()
|
||||
for f in formitaeten or []:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
@@ -300,9 +361,9 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
||||
'info_dict': {
|
||||
'id': 'das-aktuelle-sportstudio',
|
||||
'title': 'das aktuelle sportstudio | ZDF',
|
||||
'title': 'das aktuelle sportstudio',
|
||||
},
|
||||
'playlist_mincount': 23,
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||
'info_dict': {
|
||||
@@ -310,6 +371,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'title': 'planet e.',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
||||
'info_dict': {
|
||||
'id': 'aktenzeichen-xy-ungeloest',
|
||||
'title': 'Aktenzeichen XY... ungelöst',
|
||||
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||
'only_matching': True,
|
||||
@@ -319,60 +388,36 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
def suitable(cls, url):
|
||||
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
|
||||
|
||||
def _og_search_title(self, webpage, fatal=False):
|
||||
title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
|
||||
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, channel_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(item_url, ie=ZDFIE.ie_key())
|
||||
for item_url in orderedSet(re.findall(
|
||||
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
|
||||
matches = re.finditer(
|
||||
r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
|
||||
webpage)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, self._og_search_title(webpage, fatal=False))
|
||||
if self._downloader.params.get('noplaylist', False):
|
||||
entry = next(
|
||||
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
||||
None)
|
||||
self.to_screen('Downloading just the main video because of --no-playlist')
|
||||
if entry:
|
||||
return entry
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
|
||||
|
||||
r"""
|
||||
player = self._extract_player(webpage, channel_id)
|
||||
def check_video(m):
|
||||
v_ref = self._search_regex(
|
||||
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
|
||||
webpage, 'check id', default='')
|
||||
v_ref = extract_attributes(v_ref)
|
||||
return v_ref.get('data-target-video-type') != 'novideo'
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
|
||||
'channel id', group='id')
|
||||
|
||||
channel = self._call_api(
|
||||
'https://api.zdf.de/content/documents/%s.json' % channel_id,
|
||||
player, url, channel_id)
|
||||
|
||||
items = []
|
||||
for module in channel['module']:
|
||||
for teaser in try_get(module, lambda x: x['teaser'], list) or []:
|
||||
t = try_get(
|
||||
teaser, lambda x: x['http://zdf.de/rels/target'], dict)
|
||||
if not t:
|
||||
continue
|
||||
items.extend(try_get(
|
||||
t,
|
||||
lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
||||
list) or [])
|
||||
items.extend(try_get(
|
||||
module,
|
||||
lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
||||
list) or [])
|
||||
|
||||
entries = []
|
||||
entry_urls = set()
|
||||
for item in items:
|
||||
t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
|
||||
if not t:
|
||||
continue
|
||||
sharing_url = t.get('http://zdf.de/rels/sharing-url')
|
||||
if not sharing_url or not isinstance(sharing_url, compat_str):
|
||||
continue
|
||||
if sharing_url in entry_urls:
|
||||
continue
|
||||
entry_urls.add(sharing_url)
|
||||
entries.append(self.url_result(
|
||||
sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
|
||||
|
||||
return self.playlist_result(entries, channel_id, channel.get('title'))
|
||||
"""
|
||||
return self.playlist_from_matches(
|
||||
(m.group('url') for m in matches if check_video(m)),
|
||||
channel_id, self._og_search_title(webpage, fatal=False))
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -270,11 +270,11 @@ def parseOpts(overrideArguments=None):
|
||||
selection.add_option(
|
||||
'--match-title',
|
||||
dest='matchtitle', metavar='REGEX',
|
||||
help='Download only matching titles (regex or caseless sub-string)')
|
||||
help='Download only matching titles (case-insensitive regex or alphanumeric sub-string)')
|
||||
selection.add_option(
|
||||
'--reject-title',
|
||||
dest='rejecttitle', metavar='REGEX',
|
||||
help='Skip download for matching titles (regex or caseless sub-string)')
|
||||
help='Skip download for matching titles (case-insensitive regex or alphanumeric sub-string)')
|
||||
selection.add_option(
|
||||
'--max-downloads',
|
||||
dest='max_downloads', metavar='NUMBER', type=int, default=None,
|
||||
@@ -801,7 +801,7 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'--postprocessor-args',
|
||||
dest='postprocessor_args', metavar='ARGS',
|
||||
help='Give these arguments to the postprocessor')
|
||||
help='Give these arguments to the postprocessor (if postprocessing is required)')
|
||||
postproc.add_option(
|
||||
'-k', '--keep-video',
|
||||
action='store_true', dest='keepvideo', default=False,
|
||||
|
@@ -13,8 +13,9 @@ from ..utils import (
|
||||
encodeFilename,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
process_communicate_or_kill,
|
||||
replace_extension,
|
||||
shell_quote
|
||||
shell_quote,
|
||||
)
|
||||
|
||||
|
||||
@@ -109,7 +110,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
|
||||
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
stdout, stderr = process_communicate_or_kill(p)
|
||||
|
||||
if p.returncode != 0:
|
||||
msg = stderr.decode('utf-8', 'replace').strip()
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
is_outdated_version,
|
||||
PostProcessingError,
|
||||
prepend_extension,
|
||||
process_communicate_or_kill,
|
||||
shell_quote,
|
||||
subtitles_filename,
|
||||
dfxp2srt,
|
||||
@@ -180,7 +181,7 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
handle = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
stdout_data, stderr_data = handle.communicate()
|
||||
stdout_data, stderr_data = process_communicate_or_kill(handle)
|
||||
expected_ret = 0 if self.probe_available else 1
|
||||
if handle.wait() != expected_ret:
|
||||
return None
|
||||
@@ -228,7 +229,7 @@ class FFmpegPostProcessor(PostProcessor):
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
stdout, stderr = process_communicate_or_kill(p)
|
||||
if p.returncode != 0:
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
msgs = stderr.strip().split('\n')
|
||||
|
@@ -40,6 +40,8 @@ class MetadataFromTitlePP(PostProcessor):
|
||||
% self._titleformat)
|
||||
return [], info
|
||||
for attribute, value in match.groupdict().items():
|
||||
if value is None:
|
||||
continue
|
||||
info[attribute] = value
|
||||
self._downloader.to_screen(
|
||||
'[fromtitle] parsed %s: %s'
|
||||
|
@@ -33,6 +33,7 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import unicodedata
|
||||
import xml.etree.ElementTree
|
||||
import zlib
|
||||
|
||||
@@ -41,7 +42,9 @@ from .compat import (
|
||||
compat_HTMLParser,
|
||||
compat_HTTPError,
|
||||
compat_basestring,
|
||||
compat_casefold,
|
||||
compat_chr,
|
||||
compat_collections_abc,
|
||||
compat_cookiejar,
|
||||
compat_ctypes_WINFUNCTYPE,
|
||||
compat_etree_fromstring,
|
||||
@@ -52,18 +55,18 @@ from .compat import (
|
||||
compat_integer_types,
|
||||
compat_kwargs,
|
||||
compat_os_name,
|
||||
compat_parse_qs,
|
||||
compat_re_Match,
|
||||
compat_shlex_quote,
|
||||
compat_str,
|
||||
compat_struct_pack,
|
||||
compat_struct_unpack,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse,
|
||||
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
compat_xpath,
|
||||
)
|
||||
|
||||
@@ -78,12 +81,12 @@ def register_socks_protocols():
|
||||
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
|
||||
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
|
||||
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
|
||||
if scheme not in compat_urlparse.uses_netloc:
|
||||
compat_urlparse.uses_netloc.append(scheme)
|
||||
if scheme not in compat_urllib_parse.uses_netloc:
|
||||
compat_urllib_parse.uses_netloc.append(scheme)
|
||||
|
||||
|
||||
# This is not clearly defined otherwise
|
||||
compiled_regex_type = type(re.compile(''))
|
||||
# Unfavoured alias
|
||||
compiled_regex_type = compat_re_Match
|
||||
|
||||
|
||||
def random_user_agent():
|
||||
@@ -1684,6 +1687,7 @@ USER_AGENTS = {
|
||||
|
||||
|
||||
NO_DEFAULT = object()
|
||||
IDENTITY = lambda x: x
|
||||
|
||||
ENGLISH_MONTH_NAMES = [
|
||||
'January', 'February', 'March', 'April', 'May', 'June',
|
||||
@@ -1696,6 +1700,17 @@ MONTH_NAMES = {
|
||||
'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
|
||||
}
|
||||
|
||||
# Timezone names for RFC2822 obs-zone
|
||||
# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
|
||||
TIMEZONE_NAMES = {
|
||||
'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
|
||||
'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
|
||||
'EST': -5, 'EDT': -4, # Eastern
|
||||
'CST': -6, 'CDT': -5, # Central
|
||||
'MST': -7, 'MDT': -6, # Mountain
|
||||
'PST': -8, 'PDT': -7 # Pacific
|
||||
}
|
||||
|
||||
KNOWN_EXTENSIONS = (
|
||||
'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
|
||||
'flv', 'f4v', 'f4a', 'f4b',
|
||||
@@ -1735,12 +1750,17 @@ DATE_FORMATS = (
|
||||
'%b %dth %Y %I:%M',
|
||||
'%Y %m %d',
|
||||
'%Y-%m-%d',
|
||||
'%Y.%m.%d.',
|
||||
'%Y/%m/%d',
|
||||
'%Y/%m/%d %H:%M',
|
||||
'%Y/%m/%d %H:%M:%S',
|
||||
'%Y%m%d%H%M',
|
||||
'%Y%m%d%H%M%S',
|
||||
'%Y%m%d',
|
||||
'%Y-%m-%d %H:%M',
|
||||
'%Y-%m-%d %H:%M:%S',
|
||||
'%Y-%m-%d %H:%M:%S.%f',
|
||||
'%Y-%m-%d %H:%M:%S:%f',
|
||||
'%d.%m.%Y %H:%M',
|
||||
'%d.%m.%Y %H.%M',
|
||||
'%Y-%m-%dT%H:%M:%SZ',
|
||||
@@ -1753,6 +1773,7 @@ DATE_FORMATS = (
|
||||
'%b %d %Y at %H:%M:%S',
|
||||
'%B %d %Y at %H:%M',
|
||||
'%B %d %Y at %H:%M:%S',
|
||||
'%H:%M %d-%b-%Y',
|
||||
)
|
||||
|
||||
DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
|
||||
@@ -1763,6 +1784,7 @@ DATE_FORMATS_DAY_FIRST.extend([
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
'%d-%m-%Y %H:%M',
|
||||
])
|
||||
|
||||
DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
|
||||
@@ -2100,6 +2122,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
|
||||
return '_'
|
||||
return char
|
||||
|
||||
# Replace look-alike Unicode glyphs
|
||||
if restricted and not is_id:
|
||||
s = unicodedata.normalize('NFKC', s)
|
||||
# Handle timestamps
|
||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
|
||||
result = ''.join(map(replace_insane, s))
|
||||
@@ -2151,7 +2176,7 @@ def sanitize_url(url):
|
||||
for mistake, fixup in COMMON_TYPOS:
|
||||
if re.match(mistake, url):
|
||||
return re.sub(mistake, fixup, url)
|
||||
return url
|
||||
return escape_url(url)
|
||||
|
||||
|
||||
def sanitized_Request(url, *args, **kwargs):
|
||||
@@ -2212,6 +2237,15 @@ def unescapeHTML(s):
|
||||
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
|
||||
|
||||
|
||||
def process_communicate_or_kill(p, *args, **kwargs):
|
||||
try:
|
||||
return p.communicate(*args, **kwargs)
|
||||
except BaseException: # Including KeyboardInterrupt
|
||||
p.kill()
|
||||
p.wait()
|
||||
raise
|
||||
|
||||
|
||||
def get_subprocess_encoding():
|
||||
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
|
||||
# For subprocess calls, encode with locale encoding
|
||||
@@ -2292,12 +2326,30 @@ def formatSeconds(secs):
|
||||
|
||||
|
||||
def make_HTTPS_handler(params, **kwargs):
|
||||
|
||||
# https://www.rfc-editor.org/info/rfc7301
|
||||
ALPN_PROTOCOLS = ['http/1.1']
|
||||
|
||||
def set_alpn_protocols(ctx):
|
||||
# From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
|
||||
# Thanks @coletdjnz
|
||||
# Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
|
||||
# https://github.com/python/cpython/issues/85140
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/3878
|
||||
try:
|
||||
ctx.set_alpn_protocols(ALPN_PROTOCOLS)
|
||||
except (AttributeError, NotImplementedError):
|
||||
# Python < 2.7.10, not ssl.HAS_ALPN
|
||||
pass
|
||||
|
||||
opts_no_check_certificate = params.get('nocheckcertificate', False)
|
||||
if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
|
||||
context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
|
||||
set_alpn_protocols(context)
|
||||
if opts_no_check_certificate:
|
||||
context.check_hostname = False
|
||||
context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
try:
|
||||
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
|
||||
except TypeError:
|
||||
@@ -2313,6 +2365,7 @@ def make_HTTPS_handler(params, **kwargs):
|
||||
if opts_no_check_certificate
|
||||
else ssl.CERT_REQUIRED)
|
||||
context.set_default_verify_paths()
|
||||
set_alpn_protocols(context)
|
||||
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
|
||||
|
||||
|
||||
@@ -2673,7 +2726,7 @@ def make_socks_conn_class(base_class, socks_proxy):
|
||||
assert issubclass(base_class, (
|
||||
compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
|
||||
|
||||
url_components = compat_urlparse.urlparse(socks_proxy)
|
||||
url_components = compat_urllib_parse.urlparse(socks_proxy)
|
||||
if url_components.scheme.lower() == 'socks5':
|
||||
socks_type = ProxyType.SOCKS5
|
||||
elif url_components.scheme.lower() in ('socks', 'socks4'):
|
||||
@@ -2938,10 +2991,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||
|
||||
def extract_timezone(date_str):
|
||||
m = re.search(
|
||||
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||
date_str)
|
||||
r'''(?x)
|
||||
^.{8,}? # >=8 char non-TZ prefix, if present
|
||||
(?P<tz>Z| # just the UTC Z, or
|
||||
(?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
|
||||
(?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
|
||||
[ ]? # optional space
|
||||
(?P<sign>\+|-) # +/-
|
||||
(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
|
||||
$)
|
||||
''', date_str)
|
||||
if not m:
|
||||
timezone = datetime.timedelta()
|
||||
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
||||
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
|
||||
if timezone is not None:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
timezone = datetime.timedelta(hours=timezone or 0)
|
||||
else:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
if not m.group('sign'):
|
||||
@@ -3009,7 +3074,8 @@ def unified_timestamp(date_str, day_first=True):
|
||||
if date_str is None:
|
||||
return None
|
||||
|
||||
date_str = re.sub(r'[,|]', '', date_str)
|
||||
date_str = re.sub(r'\s+', ' ', re.sub(
|
||||
r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
|
||||
|
||||
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||
timezone, date_str = extract_timezone(date_str)
|
||||
@@ -3035,7 +3101,7 @@ def unified_timestamp(date_str, day_first=True):
|
||||
pass
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
return calendar.timegm(timetuple) + pm_delta * 3600
|
||||
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
|
||||
|
||||
|
||||
def determine_ext(url, default_ext='unknown_video'):
|
||||
@@ -3608,7 +3674,7 @@ def remove_quotes(s):
|
||||
|
||||
|
||||
def url_basename(url):
|
||||
path = compat_urlparse.urlparse(url).path
|
||||
path = compat_urllib_parse.urlparse(url).path
|
||||
return path.strip('/').split('/')[-1]
|
||||
|
||||
|
||||
@@ -3628,7 +3694,7 @@ def urljoin(base, path):
|
||||
if not isinstance(base, compat_str) or not re.match(
|
||||
r'^(?:https?:)?//', base):
|
||||
return None
|
||||
return compat_urlparse.urljoin(base, path)
|
||||
return compat_urllib_parse.urljoin(base, path)
|
||||
|
||||
|
||||
class HEADRequest(compat_urllib_request.Request):
|
||||
@@ -3645,13 +3711,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
||||
if get_attr:
|
||||
if v is not None:
|
||||
v = getattr(v, get_attr, None)
|
||||
if v == '':
|
||||
v = None
|
||||
if v is None:
|
||||
if v in (None, ''):
|
||||
return default
|
||||
try:
|
||||
return int(v) * invscale // scale
|
||||
except (ValueError, TypeError):
|
||||
except (ValueError, TypeError, OverflowError):
|
||||
return default
|
||||
|
||||
|
||||
@@ -3769,7 +3833,8 @@ def check_executable(exe, args=[]):
|
||||
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
|
||||
args can be a list of arguments for a short output (like -version) """
|
||||
try:
|
||||
subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
|
||||
process_communicate_or_kill(subprocess.Popen(
|
||||
[exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
|
||||
except OSError:
|
||||
return False
|
||||
return exe
|
||||
@@ -3783,10 +3848,10 @@ def get_exe_version(exe, args=['--version'],
|
||||
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
|
||||
# SIGTTOU if youtube-dl is run in the background.
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
|
||||
out, _ = subprocess.Popen(
|
||||
out, _ = process_communicate_or_kill(subprocess.Popen(
|
||||
[encodeArgument(exe)] + args,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
|
||||
except OSError:
|
||||
return False
|
||||
if isinstance(out, bytes): # Python 2.x
|
||||
@@ -3805,6 +3870,105 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
|
||||
return unrecognized
|
||||
|
||||
|
||||
class LazyList(compat_collections_abc.Sequence):
|
||||
"""Lazy immutable list from an iterable
|
||||
Note that slices of a LazyList are lists and not LazyList"""
|
||||
|
||||
class IndexError(IndexError):
|
||||
def __init__(self, cause=None):
|
||||
if cause:
|
||||
# reproduce `raise from`
|
||||
self.__cause__ = cause
|
||||
super(IndexError, self).__init__()
|
||||
|
||||
def __init__(self, iterable, **kwargs):
|
||||
# kwarg-only
|
||||
reverse = kwargs.get('reverse', False)
|
||||
_cache = kwargs.get('_cache')
|
||||
|
||||
self._iterable = iter(iterable)
|
||||
self._cache = [] if _cache is None else _cache
|
||||
self._reversed = reverse
|
||||
|
||||
def __iter__(self):
|
||||
if self._reversed:
|
||||
# We need to consume the entire iterable to iterate in reverse
|
||||
for item in self.exhaust():
|
||||
yield item
|
||||
return
|
||||
for item in self._cache:
|
||||
yield item
|
||||
for item in self._iterable:
|
||||
self._cache.append(item)
|
||||
yield item
|
||||
|
||||
def _exhaust(self):
|
||||
self._cache.extend(self._iterable)
|
||||
self._iterable = [] # Discard the emptied iterable to make it pickle-able
|
||||
return self._cache
|
||||
|
||||
def exhaust(self):
|
||||
"""Evaluate the entire iterable"""
|
||||
return self._exhaust()[::-1 if self._reversed else 1]
|
||||
|
||||
@staticmethod
|
||||
def _reverse_index(x):
|
||||
return None if x is None else ~x
|
||||
|
||||
def __getitem__(self, idx):
|
||||
if isinstance(idx, slice):
|
||||
if self._reversed:
|
||||
idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
|
||||
start, stop, step = idx.start, idx.stop, idx.step or 1
|
||||
elif isinstance(idx, int):
|
||||
if self._reversed:
|
||||
idx = self._reverse_index(idx)
|
||||
start, stop, step = idx, idx, 0
|
||||
else:
|
||||
raise TypeError('indices must be integers or slices')
|
||||
if ((start or 0) < 0 or (stop or 0) < 0
|
||||
or (start is None and step < 0)
|
||||
or (stop is None and step > 0)):
|
||||
# We need to consume the entire iterable to be able to slice from the end
|
||||
# Obviously, never use this with infinite iterables
|
||||
self._exhaust()
|
||||
try:
|
||||
return self._cache[idx]
|
||||
except IndexError as e:
|
||||
raise self.IndexError(e)
|
||||
n = max(start or 0, stop or 0) - len(self._cache) + 1
|
||||
if n > 0:
|
||||
self._cache.extend(itertools.islice(self._iterable, n))
|
||||
try:
|
||||
return self._cache[idx]
|
||||
except IndexError as e:
|
||||
raise self.IndexError(e)
|
||||
|
||||
def __bool__(self):
|
||||
try:
|
||||
self[-1] if self._reversed else self[0]
|
||||
except self.IndexError:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __len__(self):
|
||||
self._exhaust()
|
||||
return len(self._cache)
|
||||
|
||||
def __reversed__(self):
|
||||
return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
|
||||
|
||||
def __repr__(self):
|
||||
# repr and str should mimic a list. So we exhaust the iterable
|
||||
return repr(self.exhaust())
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.exhaust())
|
||||
|
||||
|
||||
class PagedList(object):
|
||||
def __len__(self):
|
||||
# This is only useful for tests
|
||||
@@ -3912,7 +4076,8 @@ def escape_rfc3986(s):
|
||||
"""Escape non-ASCII characters as suggested by RFC 3986"""
|
||||
if sys.version_info < (3, 0) and isinstance(s, compat_str):
|
||||
s = s.encode('utf-8')
|
||||
return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
|
||||
# ensure unicode: after quoting, it can always be converted
|
||||
return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
|
||||
|
||||
|
||||
def escape_url(url):
|
||||
@@ -3927,6 +4092,10 @@ def escape_url(url):
|
||||
).geturl()
|
||||
|
||||
|
||||
def parse_qs(url):
|
||||
return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
|
||||
|
||||
|
||||
def read_batch_urls(batch_fd):
|
||||
def fixup(url):
|
||||
if not isinstance(url, compat_str):
|
||||
@@ -3947,14 +4116,28 @@ def urlencode_postdata(*args, **kargs):
|
||||
return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
|
||||
|
||||
|
||||
def update_url(url, **kwargs):
|
||||
"""Replace URL components specified by kwargs
|
||||
url: compat_str or parsed URL tuple
|
||||
if query_update is in kwargs, update query with
|
||||
its value instead of replacing (overrides any `query`)
|
||||
returns: compat_str
|
||||
"""
|
||||
if not kwargs:
|
||||
return compat_urllib_parse.urlunparse(url) if isinstance(url, tuple) else url
|
||||
if not isinstance(url, tuple):
|
||||
url = compat_urllib_parse.urlparse(url)
|
||||
query = kwargs.pop('query_update', None)
|
||||
if query:
|
||||
qs = compat_parse_qs(url.query)
|
||||
qs.update(query)
|
||||
kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
|
||||
kwargs = compat_kwargs(kwargs)
|
||||
return compat_urllib_parse.urlunparse(url._replace(**kwargs))
|
||||
|
||||
|
||||
def update_url_query(url, query):
|
||||
if not query:
|
||||
return url
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
qs = compat_parse_qs(parsed_url.query)
|
||||
qs.update(query)
|
||||
return compat_urlparse.urlunparse(parsed_url._replace(
|
||||
query=compat_urllib_parse_urlencode(qs, True)))
|
||||
return update_url(url, query_update=query)
|
||||
|
||||
|
||||
def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||
@@ -4029,6 +4212,10 @@ def multipart_encode(data, boundary=None):
|
||||
return out, content_type
|
||||
|
||||
|
||||
def variadic(x, allowed_types=(compat_str, bytes, dict)):
|
||||
return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
@@ -4039,6 +4226,23 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
return d.get(key_or_keys, default)
|
||||
|
||||
|
||||
def try_call(*funcs, **kwargs):
|
||||
|
||||
# parameter defaults
|
||||
expected_type = kwargs.get('expected_type')
|
||||
fargs = kwargs.get('args', [])
|
||||
fkwargs = kwargs.get('kwargs', {})
|
||||
|
||||
for f in funcs:
|
||||
try:
|
||||
val = f(*fargs, **fkwargs)
|
||||
except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
|
||||
pass
|
||||
else:
|
||||
if expected_type is None or isinstance(val, expected_type):
|
||||
return val
|
||||
|
||||
|
||||
def try_get(src, getter, expected_type=None):
|
||||
if not isinstance(getter, (list, tuple)):
|
||||
getter = [getter]
|
||||
@@ -5401,7 +5605,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
||||
|
||||
if proxy == '__noproxy__':
|
||||
return None # No Proxy
|
||||
if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
|
||||
if compat_urllib_parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
|
||||
req.add_header('Ytdl-socks-proxy', proxy)
|
||||
# youtube-dl's http/https handlers do wrapping the socket with socks
|
||||
return None
|
||||
@@ -5725,7 +5929,7 @@ def write_xattr(path, key, value):
|
||||
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
except EnvironmentError as e:
|
||||
raise XAttrMetadataError(e.errno, e.strerror)
|
||||
stdout, stderr = p.communicate()
|
||||
stdout, stderr = process_communicate_or_kill(p)
|
||||
stderr = stderr.decode('utf-8', 'replace')
|
||||
if p.returncode != 0:
|
||||
raise XAttrMetadataError(p.returncode, stderr)
|
||||
@@ -5772,3 +5976,212 @@ def clean_podcast_url(url):
|
||||
st\.fm # https://podsights.com/docs/
|
||||
)/e
|
||||
)/''', '', url)
|
||||
|
||||
|
||||
def traverse_obj(obj, *paths, **kwargs):
|
||||
"""
|
||||
Safely traverse nested `dict`s and `Sequence`s
|
||||
|
||||
>>> obj = [{}, {"key": "value"}]
|
||||
>>> traverse_obj(obj, (1, "key"))
|
||||
"value"
|
||||
|
||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||
The next path will also be tested if the path branched but no results could be found.
|
||||
Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
|
||||
A value of None is treated as the absence of a value.
|
||||
|
||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||
|
||||
The keys in the path can be one of:
|
||||
- `None`: Return the current object.
|
||||
- `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`.
|
||||
- `slice`: Branch out and return all values in `obj[key]`.
|
||||
- `Ellipsis`: Branch out and return a list of all values.
|
||||
- `tuple`/`list`: Branch out and return a list of all matching values.
|
||||
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
||||
- `function`: Branch out and return values filtered by the function.
|
||||
Read as: `[value for key, value in obj if function(key, value)]`.
|
||||
For `Sequence`s, `key` is the index of the value.
|
||||
- `dict` Transform the current object and return a matching dict.
|
||||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||
|
||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||
|
||||
@params paths Paths which to traverse by.
|
||||
Keyword arguments:
|
||||
@param default Value to return if the paths do not match.
|
||||
@param expected_type If a `type`, only accept final values of this type.
|
||||
If any other callable, try to call the function on each result.
|
||||
@param get_all If `False`, return the first matching result, otherwise all matching ones.
|
||||
@param casesense If `False`, consider string dictionary keys as case insensitive.
|
||||
|
||||
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
|
||||
|
||||
@param _is_user_input Whether the keys are generated from user input.
|
||||
If `True` strings get converted to `int`/`slice` if needed.
|
||||
@param _traverse_string Whether to traverse into objects as strings.
|
||||
If `True`, any non-compatible object will first be
|
||||
converted into a string and then traversed into.
|
||||
|
||||
|
||||
@returns The result of the object traversal.
|
||||
If successful, `get_all=True`, and the path branches at least once,
|
||||
then a list of results is returned instead.
|
||||
A list is always returned if the last path branches and no `default` is given.
|
||||
"""
|
||||
|
||||
# parameter defaults
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
expected_type = kwargs.get('expected_type')
|
||||
get_all = kwargs.get('get_all', True)
|
||||
casesense = kwargs.get('casesense', True)
|
||||
_is_user_input = kwargs.get('_is_user_input', False)
|
||||
_traverse_string = kwargs.get('_traverse_string', False)
|
||||
|
||||
# instant compat
|
||||
str = compat_str
|
||||
|
||||
is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
|
||||
casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
|
||||
|
||||
if isinstance(expected_type, type):
|
||||
type_test = lambda val: val if isinstance(val, expected_type) else None
|
||||
else:
|
||||
type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
|
||||
|
||||
def from_iterable(iterables):
|
||||
# chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
|
||||
for it in iterables:
|
||||
for item in it:
|
||||
yield item
|
||||
|
||||
def apply_key(key, obj):
|
||||
if obj is None:
|
||||
return
|
||||
|
||||
elif key is None:
|
||||
yield obj
|
||||
|
||||
elif isinstance(key, (list, tuple)):
|
||||
for branch in key:
|
||||
_, result = apply_path(obj, branch)
|
||||
for item in result:
|
||||
yield item
|
||||
|
||||
elif key is Ellipsis:
|
||||
result = []
|
||||
if isinstance(obj, compat_collections_abc.Mapping):
|
||||
result = obj.values()
|
||||
elif is_sequence(obj):
|
||||
result = obj
|
||||
elif isinstance(obj, compat_re_Match):
|
||||
result = obj.groups()
|
||||
elif _traverse_string:
|
||||
result = str(obj)
|
||||
for item in result:
|
||||
yield item
|
||||
|
||||
elif callable(key):
|
||||
if is_sequence(obj):
|
||||
iter_obj = enumerate(obj)
|
||||
elif isinstance(obj, compat_collections_abc.Mapping):
|
||||
iter_obj = obj.items()
|
||||
elif isinstance(obj, compat_re_Match):
|
||||
iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
|
||||
elif _traverse_string:
|
||||
iter_obj = enumerate(str(obj))
|
||||
else:
|
||||
return
|
||||
for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
|
||||
yield item
|
||||
|
||||
elif isinstance(key, dict):
|
||||
iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
|
||||
yield dict((k, v if v is not None else default) for k, v in iter_obj
|
||||
if v is not None or default is not NO_DEFAULT)
|
||||
|
||||
elif isinstance(obj, compat_collections_abc.Mapping):
|
||||
yield (obj.get(key) if casesense or (key in obj)
|
||||
else next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||
|
||||
elif isinstance(obj, compat_re_Match):
|
||||
if isinstance(key, int) or casesense:
|
||||
try:
|
||||
yield obj.group(key)
|
||||
return
|
||||
except IndexError:
|
||||
pass
|
||||
if not isinstance(key, str):
|
||||
return
|
||||
|
||||
yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
||||
|
||||
else:
|
||||
if _is_user_input:
|
||||
key = (int_or_none(key) if ':' not in key
|
||||
else slice(*map(int_or_none, key.split(':'))))
|
||||
|
||||
if not isinstance(key, (int, slice)):
|
||||
return
|
||||
|
||||
if not is_sequence(obj):
|
||||
if not _traverse_string:
|
||||
return
|
||||
obj = str(obj)
|
||||
|
||||
try:
|
||||
yield obj[key]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def apply_path(start_obj, path):
|
||||
objs = (start_obj,)
|
||||
has_branched = False
|
||||
|
||||
for key in variadic(path):
|
||||
if _is_user_input and key == ':':
|
||||
key = Ellipsis
|
||||
|
||||
if not casesense and isinstance(key, str):
|
||||
key = compat_casefold(key)
|
||||
|
||||
if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
|
||||
has_branched = True
|
||||
|
||||
key_func = functools.partial(apply_key, key)
|
||||
objs = from_iterable(map(key_func, objs))
|
||||
|
||||
return has_branched, objs
|
||||
|
||||
def _traverse_obj(obj, path, use_list=True):
|
||||
has_branched, results = apply_path(obj, path)
|
||||
results = LazyList(x for x in map(type_test, results) if x is not None)
|
||||
|
||||
if get_all and has_branched:
|
||||
return results.exhaust() if results or use_list else None
|
||||
|
||||
return results[0] if results else None
|
||||
|
||||
for index, path in enumerate(paths, 1):
|
||||
use_list = default is NO_DEFAULT and index == len(paths)
|
||||
result = _traverse_obj(obj, path, use_list)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
return None if default is NO_DEFAULT else default
|
||||
|
||||
|
||||
def get_first(obj, keys, **kwargs):
|
||||
return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
|
||||
|
||||
|
||||
def join_nonempty(*values, **kwargs):
|
||||
|
||||
# parameter defaults
|
||||
delim = kwargs.get('delim', '-')
|
||||
from_dict = kwargs.get('from_dict')
|
||||
|
||||
if from_dict is not None:
|
||||
values = (traverse_obj(from_dict, variadic(v)) for v in values)
|
||||
return delim.join(map(compat_str, filter(None, values)))
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.06.06'
|
||||
__version__ = '2021.12.17'
|
||||
|
Reference in New Issue
Block a user