mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-14 20:28:36 +09:00
Compare commits
28 Commits
2019.03.01
...
2019.03.09
Author | SHA1 | Date | |
---|---|---|---|
![]() |
10734553fe | ||
![]() |
bba35695eb | ||
![]() |
9d74ea6d36 | ||
![]() |
7b6e760870 | ||
![]() |
829685b88a | ||
![]() |
fca9baf0da | ||
![]() |
d347b52b63 | ||
![]() |
97157c692c | ||
![]() |
a551768acf | ||
![]() |
ee0ba927aa | ||
![]() |
399f76870d | ||
![]() |
e5ada4f3ad | ||
![]() |
bb6f112d9d | ||
![]() |
c17eb5b4b0 | ||
![]() |
d9eb580a79 | ||
![]() |
5dcd630dca | ||
![]() |
c790e93ab5 | ||
![]() |
39c780fdec | ||
![]() |
e7e62441cd | ||
![]() |
0a5baf9c21 | ||
![]() |
8ae113ca9d | ||
![]() |
7aeb788e56 | ||
![]() |
7465e0aee2 | ||
![]() |
a8f83f0c56 | ||
![]() |
dca0e0040a | ||
![]() |
398e1e21d6 | ||
![]() |
c5b02efe20 | ||
![]() |
06242d44fe |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.09**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.03.01
|
||||
[debug] youtube-dl version 2019.03.09
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -9,7 +9,6 @@ python:
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
sudo: false
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
|
31
ChangeLog
31
ChangeLog
@@ -1,3 +1,34 @@
|
||||
version 2019.03.09
|
||||
|
||||
Core
|
||||
* [extractor/common] Use compat_etree_Element
|
||||
+ [compat] Introduce compat_etree_Element
|
||||
* [extractor/common] Fallback url to base URL for DASH formats
|
||||
* [extractor/common] Do not fail on invalid data while parsing F4M manifest
|
||||
in non fatal mode
|
||||
* [extractor/common] Return MPD manifest as format's url meta field (#20242)
|
||||
* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
|
||||
|
||||
Extractors
|
||||
* [francetv:site] Relax video id regular expression (#20268)
|
||||
* [toutv] Detect invalid login error
|
||||
* [toutv] Fix authentication (#20261)
|
||||
+ [urplay] Extract timestamp (#20235)
|
||||
+ [openload] Add support for oload.space (#20246)
|
||||
* [facebook] Improve uploader extraction (#20250)
|
||||
* [bbc] Use compat_etree_Element
|
||||
* [crunchyroll] Use compat_etree_Element
|
||||
* [npo] Improve ISM extraction
|
||||
* [rai] Improve extraction (#20253)
|
||||
* [paramountnetwork] Fix mgid extraction (#20241)
|
||||
* [libsyn] Improve extraction (#20229)
|
||||
+ [youtube] Add more invidious instances to URL regular expression (#20228)
|
||||
* [spankbang] Fix extraction (#20023)
|
||||
* [espn] Extend URL regular expression (#20013)
|
||||
* [sixplay] Handle videos with empty assets (#20016)
|
||||
+ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
|
||||
|
||||
|
||||
version 2019.03.01
|
||||
|
||||
Core
|
||||
|
@@ -29,6 +29,16 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
tf.close()
|
||||
os.remove(tf.name)
|
||||
|
||||
def test_strip_httponly_prefix(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
def assert_cookie_has_value(key):
|
||||
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||
|
||||
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from youtube_dl.compat import (
|
||||
compat_getenv,
|
||||
compat_setenv,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_shlex_split,
|
||||
@@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase):
|
||||
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
|
||||
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
|
||||
|
||||
def test_compat_etree_Element(self):
|
||||
try:
|
||||
compat_etree_Element.items
|
||||
except AttributeError:
|
||||
self.fail('compat_etree_Element is not a type')
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
<root foo="bar" spam="中文">
|
||||
|
6
test/testdata/cookies/httponly_cookies.txt
vendored
Normal file
6
test/testdata/cookies/httponly_cookies.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
|
||||
www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
|
@@ -2508,6 +2508,15 @@ class _TreeBuilder(etree.TreeBuilder):
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||
# the following will crash with:
|
||||
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||
isinstance(None, xml.etree.ElementTree.Element)
|
||||
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||
except TypeError: # Python <=2.6
|
||||
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
@@ -2969,6 +2978,7 @@ __all__ = [
|
||||
'compat_cookiejar',
|
||||
'compat_cookies',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
'compat_etree_fromstring',
|
||||
'compat_etree_register_namespace',
|
||||
'compat_expanduser',
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
import xml
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -23,6 +22,7 @@ from ..utils import (
|
||||
urljoin,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_Element,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -317,7 +317,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
continue
|
||||
captions = self._download_xml(
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, xml.etree.ElementTree.Element):
|
||||
if not isinstance(captions, compat_etree_Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
|
@@ -17,6 +17,7 @@ import math
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookies,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_integer_types,
|
||||
@@ -102,10 +103,23 @@ class InfoExtractor(object):
|
||||
from worst to best quality.
|
||||
|
||||
Potential fields:
|
||||
* url Mandatory. The URL of the video file
|
||||
* url The mandatory URL representing the media:
|
||||
for plain file media - HTTP URL of this file,
|
||||
for RTMP - RTMP URL,
|
||||
for HLS - URL of the M3U8 media playlist,
|
||||
for HDS - URL of the F4M manifest,
|
||||
for DASH - URL of the MPD manifest or
|
||||
base URL representing the media
|
||||
if MPD manifest is parsed from
|
||||
a string,
|
||||
for MSS - URL of the ISM manifest.
|
||||
* manifest_url
|
||||
The URL of the manifest file in case of
|
||||
fragmented media (DASH, hls, hds)
|
||||
fragmented media:
|
||||
for HLS - URL of the M3U8 master playlist,
|
||||
for HDS - URL of the F4M manifest,
|
||||
for DASH - URL of the MPD manifest,
|
||||
for MSS - URL of the ISM manifest.
|
||||
* ext Will be calculated from URL if missing
|
||||
* format A human-readable description of the format
|
||||
("mp4 container with h264/opus").
|
||||
@@ -788,7 +802,7 @@ class InfoExtractor(object):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
|
||||
Return a tuple (xml as an compat_etree_Element, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
@@ -809,7 +823,7 @@ class InfoExtractor(object):
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the xml as an xml.etree.ElementTree.Element.
|
||||
Return the xml as an compat_etree_Element.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
@@ -1454,6 +1468,9 @@ class InfoExtractor(object):
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True, m3u8_id=None):
|
||||
if not isinstance(manifest, compat_etree_Element) and not fatal:
|
||||
return []
|
||||
|
||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||
@@ -2120,7 +2137,8 @@ class InfoExtractor(object):
|
||||
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
|
||||
f = {
|
||||
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
|
||||
'url': base_url,
|
||||
# NB: mpd_url may be empty when MPD manifest is parsed from a string
|
||||
'url': mpd_url or base_url,
|
||||
'manifest_url': mpd_url,
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree as etree
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
@@ -12,6 +11,7 @@ from .common import InfoExtractor
|
||||
from .vrv import VRVIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
@@ -56,22 +56,11 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
if username is None:
|
||||
return
|
||||
|
||||
self._download_webpage(
|
||||
'https://www.crunchyroll.com/?a=formhandler',
|
||||
None, 'Logging in', 'Wrong login info',
|
||||
data=urlencode_postdata({
|
||||
'formname': 'RpcApiUser_Login',
|
||||
'next_url': 'https://www.crunchyroll.com/acct/membership',
|
||||
'name': username,
|
||||
'password': password,
|
||||
}))
|
||||
|
||||
'''
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
def is_logged(webpage):
|
||||
return '<title>Redirecting' in webpage
|
||||
return 'href="/logout"' in webpage
|
||||
|
||||
# Already logged in
|
||||
if is_logged(login_page):
|
||||
@@ -110,7 +99,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
'''
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -402,7 +390,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if not isinstance(sub_doc, etree.Element):
|
||||
if not isinstance(sub_doc, compat_etree_Element):
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
@@ -519,7 +507,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if isinstance(streamdata, etree.Element):
|
||||
if isinstance(streamdata, compat_etree_Element):
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
@@ -530,7 +518,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if isinstance(stream_info, etree.Element):
|
||||
if isinstance(stream_info, compat_etree_Element):
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
@@ -605,7 +593,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
|
||||
if isinstance(metadata, etree.Element):
|
||||
if isinstance(metadata, compat_etree_Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
|
@@ -29,7 +29,8 @@ class ESPNIE(OnceIE):
|
||||
(?:
|
||||
.*?\?.*?\bid=|
|
||||
/_/id/
|
||||
)
|
||||
)|
|
||||
[^/]+/video/
|
||||
)
|
||||
)|
|
||||
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
|
||||
@@ -94,6 +95,9 @@ class ESPNIE(OnceIE):
|
||||
}, {
|
||||
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -424,7 +424,7 @@ class FacebookIE(InfoExtractor):
|
||||
uploader = clean_html(get_element_by_id(
|
||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
||||
fatal=False) or self._og_search_title(webpage, fatal=False)
|
||||
default=None) or self._og_search_title(webpage, fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
@@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': '162311093',
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
|
||||
@@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
|
||||
catalogue = None
|
||||
video_id = self._search_regex(
|
||||
r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
|
||||
webpage, 'video id', default=None, group='id')
|
||||
|
||||
if not video_id:
|
||||
|
@@ -1,12 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
parse_duration,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor):
|
||||
'id': '6385796',
|
||||
'ext': 'mp3',
|
||||
'title': "Champion Minded - Developing a Growth Mindset",
|
||||
'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
||||
# description fetched using another request:
|
||||
# http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
|
||||
# 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
|
||||
'upload_date': '20180320',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
},
|
||||
@@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
url = m.group('mainurl')
|
||||
url, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
|
||||
if podcast_title:
|
||||
podcast_title = podcast_title.strip()
|
||||
episode_title = self._search_regex(
|
||||
r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
|
||||
if episode_title:
|
||||
episode_title = episode_title.strip()
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'var\s+playlistItem\s*=\s*({.+?});',
|
||||
webpage, 'JSON data block'), video_id)
|
||||
|
||||
episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
|
||||
if not episode_title:
|
||||
self._search_regex(
|
||||
[r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
|
||||
webpage, 'episode title')
|
||||
episode_title = episode_title.strip()
|
||||
|
||||
podcast_title = strip_or_none(clean_html(self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'podcast title',
|
||||
default=None) or get_element_by_class('podcast-title', webpage)))
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
formats = []
|
||||
for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
|
||||
f_url = data.get(k)
|
||||
if not f_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
|
||||
'description', default=None)
|
||||
@@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor):
|
||||
# Strip non-breaking and normal spaces
|
||||
description = description.replace('\u00A0', ' ').strip()
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
|
||||
data = json.loads(data_json)
|
||||
|
||||
formats = [{
|
||||
'url': data['media_url'],
|
||||
'format_id': 'main',
|
||||
}, {
|
||||
'url': data['media_url_libsyn'],
|
||||
'format_id': 'libsyn',
|
||||
}]
|
||||
thumbnail = data.get('thumbnail_url')
|
||||
duration = parse_duration(data.get('duration'))
|
||||
r'<div class="release_date">Released: ([^<]+)<',
|
||||
webpage, 'release date', default=None) or data.get('release_date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnail': data.get('thumbnail_url'),
|
||||
'upload_date': release_date,
|
||||
'duration': duration,
|
||||
'duration': parse_duration(data.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -238,7 +238,7 @@ class NPOIE(NPOBaseIE):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif '.ism/Manifest' in stream_url:
|
||||
elif re.search(r'\.isml?/Manifest', stream_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
stream_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
|
@@ -249,7 +249,7 @@ class OpenloadIE(InfoExtractor):
|
||||
(?:www\.)?
|
||||
(?:
|
||||
openload\.(?:co|io|link|pw)|
|
||||
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live)
|
||||
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space)
|
||||
)
|
||||
)/
|
||||
(?:f|embed)/
|
||||
@@ -349,6 +349,9 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://oload.live/f/-Z58UZ-GR4M',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.space/f/IY4eZSst3u8/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
|
@@ -74,11 +74,11 @@ class RaiBaseIE(InfoExtractor):
|
||||
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
|
||||
continue
|
||||
|
||||
if ext == 'm3u8':
|
||||
if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
elif ext == 'f4m' or platform == 'flash':
|
||||
manifest_url = update_url_query(
|
||||
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
|
||||
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
|
||||
|
@@ -61,7 +61,8 @@ class SixPlayIE(InfoExtractor):
|
||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for asset in clip_data['assets']:
|
||||
assets = clip_data.get('assets') or []
|
||||
for asset in assets:
|
||||
asset_url = asset.get('full_physical_path')
|
||||
protocol = asset.get('protocol')
|
||||
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
||||
|
@@ -9,6 +9,8 @@ from ..utils import (
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -64,16 +66,49 @@ class SpankBangIE(InfoExtractor):
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for mobj in re.finditer(
|
||||
r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage):
|
||||
format_id, format_url = mobj.group('id', 'url')
|
||||
|
||||
def extract_format(format_id, format_url):
|
||||
f_url = url_or_none(format_url)
|
||||
if not f_url:
|
||||
return
|
||||
f = parse_resolution(format_id)
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'url': f_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
STREAM_URL_PREFIX = 'stream_url_'
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
|
||||
% STREAM_URL_PREFIX, webpage):
|
||||
extract_format(mobj.group('id', 'url'))
|
||||
|
||||
if not formats:
|
||||
stream_key = self._search_regex(
|
||||
r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'stream key', group='value')
|
||||
|
||||
sb_csrf_session = self._get_cookies(
|
||||
'https://spankbang.com')['sb_csrf_session'].value
|
||||
|
||||
stream = self._download_json(
|
||||
'https://spankbang.com/api/videos/stream', video_id,
|
||||
'Downloading stream JSON', data=urlencode_postdata({
|
||||
'id': stream_key,
|
||||
'data': 0,
|
||||
'sb_csrf_session': sb_csrf_session,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-CSRFToken': sb_csrf_session,
|
||||
})
|
||||
|
||||
for format_id, format_url in stream.items():
|
||||
if format_id.startswith(STREAM_URL_PREFIX):
|
||||
extract_format(
|
||||
format_id[len(STREAM_URL_PREFIX):], format_url)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_regex(
|
||||
|
@@ -46,8 +46,12 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
cs = self._parse_json(self._search_regex(
|
||||
root_data = self._parse_json(self._search_regex(
|
||||
r'window\.__DATA__\s*=\s*({.+})',
|
||||
webpage, 'data'), None)['children']
|
||||
c = next(c for c in cs if c.get('type') == 'VideoPlayer')
|
||||
webpage, 'data'), None)
|
||||
|
||||
def find_sub_data(data, data_type):
|
||||
return next(c for c in data['children'] if c.get('type') == data_type)
|
||||
|
||||
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
|
||||
return c['props']['media']['video']['config']['uri']
|
||||
|
@@ -1,14 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .radiocanada import RadioCanadaIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -38,47 +38,30 @@ class TouTvIE(RadioCanadaIE):
|
||||
'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36'
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
login_webpage = self._download_webpage(
|
||||
'https://services.radio-canada.ca/auth/oauth/v2/authorize',
|
||||
None, 'Downloading login page', query={
|
||||
'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36',
|
||||
'redirect_uri': 'https://ici.tou.tv/logincallback',
|
||||
'response_type': 'token',
|
||||
'scope': 'id.write media-validation.read',
|
||||
'state': '/',
|
||||
})
|
||||
|
||||
def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
|
||||
form, form_elem = re.search(
|
||||
r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups()
|
||||
form_data = self._hidden_inputs(form)
|
||||
form_url = extract_attributes(form_elem).get('action') or default_form_url
|
||||
return form_url, form_data
|
||||
|
||||
post_url, form_data = extract_form_url_and_data(
|
||||
login_webpage,
|
||||
'https://services.radio-canada.ca/auth/oauth/v2/authorize/login',
|
||||
r'(?:id|name)="Form-login"')
|
||||
form_data.update({
|
||||
'login-email': email,
|
||||
'login-password': password,
|
||||
})
|
||||
consent_webpage = self._download_webpage(
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(form_data))
|
||||
post_url, form_data = extract_form_url_and_data(
|
||||
consent_webpage,
|
||||
'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent')
|
||||
_, urlh = self._download_webpage_handle(
|
||||
post_url, None, 'Following Redirection',
|
||||
data=urlencode_postdata(form_data))
|
||||
self._access_token = self._search_regex(
|
||||
r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
urlh.geturl(), 'access token')
|
||||
try:
|
||||
self._access_token = self._download_json(
|
||||
'https://services.radio-canada.ca/toutv/profiling/accounts/login',
|
||||
None, 'Logging in', data=json.dumps({
|
||||
'ClientId': self._CLIENT_KEY,
|
||||
'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
|
||||
'Email': email,
|
||||
'Password': password,
|
||||
'Scope': 'id.write media-validation.read',
|
||||
}).encode(), headers={
|
||||
'Authorization': 'client-key ' + self._CLIENT_KEY,
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
})['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read().decode(), None)['Message']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
self._claims = self._call_api('validation/v2/getClaims')['claims']
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -2,18 +2,31 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class URPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
|
||||
'md5': 'ad5f0de86f16ca4c8062cd103959a9eb',
|
||||
'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
|
||||
'md5': 'ff5b0c89928f8083c74bbd5099c9292d',
|
||||
'info_dict': {
|
||||
'id': '203704',
|
||||
'ext': 'mp4',
|
||||
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
|
||||
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
|
||||
'timestamp': 1513512768,
|
||||
'upload_date': '20171217',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||
'info_dict': {
|
||||
'id': '190031',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tripp, Trapp, Träd : Sovkudde',
|
||||
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
|
||||
'timestamp': 1440093600,
|
||||
'upload_date': '20150820',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
|
||||
@@ -51,6 +64,7 @@ class URPlayIE(InfoExtractor):
|
||||
'title': urplayer_data['title'],
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': urplayer_data.get('image'),
|
||||
'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')),
|
||||
'series': urplayer_data.get('series_title'),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
|
@@ -502,7 +502,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
orig_url = url
|
||||
if mobj.group('pro') or mobj.group('player'):
|
||||
if mobj.group('pro'):
|
||||
# some videos require portfolio_id to be present in player url
|
||||
# https://github.com/rg3/youtube-dl/issues/20070
|
||||
url = self._extract_url(url, self._download_webpage(url, video_id))
|
||||
elif mobj.group('player'):
|
||||
url = 'https://player.vimeo.com/video/' + video_id
|
||||
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
|
||||
url = 'https://vimeo.com/' + video_id
|
||||
|
@@ -352,6 +352,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:www\.)?yourepeat\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
(?:www\.)?invidio\.us/|
|
||||
(?:www\.)?invidious\.snopyta\.org/|
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?vid\.wxzm\.sx/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
|
@@ -1141,6 +1141,8 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
|
||||
|
||||
|
||||
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
||||
_HTTPONLY_PREFIX = '#HttpOnly_'
|
||||
|
||||
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
# Store session cookies with `expires` set to 0 instead of an empty
|
||||
# string
|
||||
@@ -1150,7 +1152,21 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
||||
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
|
||||
"""Load cookies from a file."""
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
filename = self.filename
|
||||
else:
|
||||
raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
|
||||
|
||||
cf = io.StringIO()
|
||||
with open(filename) as f:
|
||||
for line in f:
|
||||
if line.startswith(self._HTTPONLY_PREFIX):
|
||||
line = line[len(self._HTTPONLY_PREFIX):]
|
||||
cf.write(compat_str(line))
|
||||
cf.seek(0)
|
||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||
# Session cookies are denoted by either `expires` field set to
|
||||
# an empty string or 0. MozillaCookieJar only recognizes the former
|
||||
# (see [1]). So we need force the latter to be recognized as session
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2019.03.01'
|
||||
__version__ = '2019.03.09'
|
||||
|
Reference in New Issue
Block a user