mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-19 22:58:37 +09:00
Compare commits
112 Commits
2018.06.04
...
2018.07.21
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8e66ffc3b7 | ||
![]() |
6f27998e75 | ||
![]() |
3052a30d42 | ||
![]() |
4ecf300d13 | ||
![]() |
af03000ad5 | ||
![]() |
b96b4be461 | ||
![]() |
edb0e17188 | ||
![]() |
e9c671d5e8 | ||
![]() |
fd62b36680 | ||
![]() |
25586c601c | ||
![]() |
ecb6b6ae2d | ||
![]() |
c258570edd | ||
![]() |
6fc09f0155 | ||
![]() |
11330f5121 | ||
![]() |
8da17f9680 | ||
![]() |
c63f5fb863 | ||
![]() |
38f1eb0ac3 | ||
![]() |
371dcc1dd4 | ||
![]() |
bd21ead2a2 | ||
![]() |
905eef2b06 | ||
![]() |
79367a9820 | ||
![]() |
40a051fa9f | ||
![]() |
7e8e948cf7 | ||
![]() |
4b3ee09886 | ||
![]() |
79fd7320e2 | ||
![]() |
0685d9727b | ||
![]() |
e06632e3fe | ||
![]() |
69fcdb845b | ||
![]() |
6868d272e5 | ||
![]() |
4742150788 | ||
![]() |
4e71dfd819 | ||
![]() |
1ed0b2f74d | ||
![]() |
e15141adae | ||
![]() |
94fef94d9c | ||
![]() |
9a6628aaf9 | ||
![]() |
689af4960e | ||
![]() |
d5de0f21b9 | ||
![]() |
24d26ab380 | ||
![]() |
836ef4840f | ||
![]() |
5621c3222e | ||
![]() |
db5debf313 | ||
![]() |
8cee692b8b | ||
![]() |
973b6ceebb | ||
![]() |
eca1f0d115 | ||
![]() |
2160768a21 | ||
![]() |
267d81962a | ||
![]() |
9cf648c92b | ||
![]() |
5e8e2fa51f | ||
![]() |
d4a24f4091 | ||
![]() |
acbd0ff5df | ||
![]() |
7b393f9cc5 | ||
![]() |
c3bcd206eb | ||
![]() |
1f6cc5807e | ||
![]() |
c306f076ec | ||
![]() |
a0949fec08 | ||
![]() |
74caf528bc | ||
![]() |
9fb62e35f6 | ||
![]() |
b71cc71910 | ||
![]() |
a4ec45179e | ||
![]() |
30374f4d40 | ||
![]() |
91aa502d91 | ||
![]() |
f51f526b0a | ||
![]() |
c9b983ff82 | ||
![]() |
e730508827 | ||
![]() |
8b4b400aef | ||
![]() |
e12b4b8bcc | ||
![]() |
18806e3b6b | ||
![]() |
713afa705c | ||
![]() |
721a877d2f | ||
![]() |
9283d4ea03 | ||
![]() |
00a429bea3 | ||
![]() |
d391b7e23d | ||
![]() |
075a13d3e9 | ||
![]() |
8ba84e4600 | ||
![]() |
858cf4dc29 | ||
![]() |
9e761fe6f5 | ||
![]() |
ce0edda0f9 | ||
![]() |
0adf213d8c | ||
![]() |
8b183bd5f8 | ||
![]() |
1882511754 | ||
![]() |
764cd4e6f3 | ||
![]() |
734d461ca0 | ||
![]() |
81c5df4f2c | ||
![]() |
87f89dacdd | ||
![]() |
9b0b627534 | ||
![]() |
61cb66830f | ||
![]() |
c797db4a2f | ||
![]() |
03eef0f032 | ||
![]() |
aa56061627 | ||
![]() |
18d66f0410 | ||
![]() |
f15f7a674b | ||
![]() |
9aca7fe6a3 | ||
![]() |
e0671819e7 | ||
![]() |
5d6c81b63f | ||
![]() |
dc53c78634 | ||
![]() |
7dc9c60b4b | ||
![]() |
e51752754d | ||
![]() |
0645be49cb | ||
![]() |
a572ae6114 | ||
![]() |
b2df66aeca | ||
![]() |
93cffb1444 | ||
![]() |
d253df2f65 | ||
![]() |
e8c6afc168 | ||
![]() |
cc37cc3f99 | ||
![]() |
9d581efe05 | ||
![]() |
ff2e486221 | ||
![]() |
6ae36035d9 | ||
![]() |
9afd74d705 | ||
![]() |
2e6975306a | ||
![]() |
06ea7bdd99 | ||
![]() |
d7be705308 | ||
![]() |
2e190c2ad9 |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.06.04*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.06.04**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.07.21*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.07.21**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.06.04
|
||||
[debug] youtube-dl version 2018.07.21
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
136
ChangeLog
136
ChangeLog
@@ -1,3 +1,139 @@
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
+ [utils] Introduce url_or_none
|
||||
* [utils] Allow JSONP without function name (#17028)
|
||||
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for BBC Radio Play pages (#17022)
|
||||
* [iwara] Fix download URLs (#17026)
|
||||
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
|
||||
+ [viu] Pass Referer and Origin headers and area id (#16992)
|
||||
+ [vimeo] Add another config regular expression (#17013)
|
||||
+ [facebook] Extract view count (#16942)
|
||||
* [dailymotion] Improve description extraction (#16984)
|
||||
* [slutload] Fix and improve extraction (#17001)
|
||||
* [mediaset] Fix extraction (#16977)
|
||||
+ [theplatform] Add support for theplatform TLD customization (#16977)
|
||||
* [imgur] Relax URL regular expression (#16987)
|
||||
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
|
||||
#16959)
|
||||
|
||||
|
||||
version 2018.07.10
|
||||
|
||||
Core
|
||||
* [utils] Share JSON-LD regular expression
|
||||
* [downloader/dash] Improve error handling (#16927)
|
||||
|
||||
Extractors
|
||||
+ [nrktv] Add support for new season and serie URL schema
|
||||
+ [nrktv] Add support for new episode URL schema (#16909)
|
||||
+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
|
||||
* [funk] Fix extraction (#16918)
|
||||
* [watchbox] Fix extraction (#16904)
|
||||
* [dplayit] Sort formats
|
||||
* [dplayit] Fix extraction (#16901)
|
||||
* [youtube] Improve login error handling (#13822)
|
||||
|
||||
|
||||
version 2018.07.04
|
||||
|
||||
Core
|
||||
* [extractor/common] Properly escape % in MPD templates (#16867)
|
||||
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
|
||||
* Prefer ffmpeg over avconv by default (#8622)
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
|
||||
* [lynda] Simplify login and improve error capturing (#16891)
|
||||
+ [go90] Add support for embed URLs (#16873)
|
||||
* [go90] Detect geo restriction error and pass geo verification headers
|
||||
(#16874)
|
||||
* [vlive] Fix live streams extraction (#16871)
|
||||
* [npo] Fix typo (#16872)
|
||||
+ [mediaset] Add support for new videos and extract all formats (#16568)
|
||||
* [dctptv] Restore extraction based on REST API (#16850)
|
||||
* [svt] Improve extraction and add support for pages (#16802)
|
||||
* [porncom] Fix extraction (#16808)
|
||||
|
||||
|
||||
version 2018.06.25
|
||||
|
||||
Extractors
|
||||
* [joj] Relax URL regular expression (#16771)
|
||||
* [brightcove] Workaround sonyliv DRM protected videos (#16807)
|
||||
* [motherless] Fix extraction (#16786)
|
||||
* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
|
||||
- [foxnews:insider] Remove extractor (#15810)
|
||||
+ [foxnews] Add support for iframe embeds (#15810, #16711)
|
||||
|
||||
|
||||
version 2018.06.19
|
||||
|
||||
Core
|
||||
+ [extractor/common] Introduce expected_status in _download_* methods
|
||||
for convenient accept of HTTP requests failed with non 2xx status codes
|
||||
+ [compat] Introduce compat_integer_types
|
||||
|
||||
Extractors
|
||||
* [peertube] Improve generic support (#16733)
|
||||
+ [6play] Use geo verification headers
|
||||
* [rtbf] Fix extraction for python 3.2
|
||||
* [vgtv] Improve HLS formats extraction
|
||||
+ [vgtv] Add support for www.aftonbladet.se/tv URLs
|
||||
* [bbccouk] Use expected_status
|
||||
* [markiza] Expect 500 HTTP status code
|
||||
* [tvnow] Try all clear manifest URLs (#15361)
|
||||
|
||||
|
||||
version 2018.06.18
|
||||
|
||||
Core
|
||||
* [downloader/rtmp] Fix downloading in verbose mode (#16736)
|
||||
|
||||
Extractors
|
||||
+ [markiza] Add support for markiza.sk (#16750)
|
||||
* [wat] Try all supported adaptive URLs
|
||||
+ [6play] Add support for rtlplay.be and extract hd usp formats
|
||||
+ [rtbf] Add support for audio and live streams (#9638, #11923)
|
||||
+ [rtbf] Extract HLS, DASH and all HTTP formats
|
||||
+ [rtbf] Extract subtitles
|
||||
+ [rtbf] Fixup specific HTTP URLs (#16101)
|
||||
+ [expressen] Add support for expressen.se
|
||||
* [vidzi] Fix extraction (#16678)
|
||||
* [pbs] Improve extraction (#16623, #16684)
|
||||
* [bilibili] Restrict cid regular expression (#16638, #16734)
|
||||
|
||||
|
||||
version 2018.06.14
|
||||
|
||||
Core
|
||||
* [downloader/http] Fix retry on error when streaming to stdout (#16699)
|
||||
|
||||
Extractors
|
||||
+ [discoverynetworks] Add support for disco-api videos (#16724)
|
||||
+ [dailymotion] Add support for password protected videos (#9789)
|
||||
+ [abc:iview] Add support for livestreams (#12354)
|
||||
* [abc:iview] Fix extraction (#16704)
|
||||
+ [crackle] Add support for sonycrackle.com (#16698)
|
||||
+ [tvnet] Add support for tvnet.gov.vn (#15462)
|
||||
* [nrk] Update API hosts and try all previously known ones (#16690)
|
||||
* [wimp] Fix Youtube embeds extraction
|
||||
|
||||
|
||||
version 2018.06.11
|
||||
|
||||
Extractors
|
||||
* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
|
||||
+ [inc] Add support for another embed schema (#16666)
|
||||
* [tv4] Fix format extraction (#16650)
|
||||
+ [nexx] Add support for free cdn (#16538)
|
||||
+ [pbs] Add another cove id pattern (#15373)
|
||||
+ [rbmaradio] Add support for 192k format (#16631)
|
||||
|
||||
|
||||
version 2018.06.04
|
||||
|
||||
Extractors
|
||||
|
10
README.md
10
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
To install it right away for all UNIX users (Linux, macOS, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
@@ -35,7 +35,7 @@ You can also use pip:
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
@@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors (default)
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
@@ -442,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
|
@@ -266,6 +266,7 @@
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
@@ -289,7 +290,6 @@
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-what**
|
||||
- **FranceCulture**
|
||||
@@ -302,6 +302,9 @@
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **Funimation**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
@@ -455,6 +458,8 @@
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **Markiza**
|
||||
- **MarkizaPage**
|
||||
- **massengeschmack.tv**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
@@ -587,7 +592,9 @@
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisode**
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeason**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@@ -811,6 +818,7 @@
|
||||
- **StretchInternet**
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SWRMediathek**
|
||||
@@ -893,6 +901,7 @@
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVN24**
|
||||
- **TVNet**
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
|
@@ -78,6 +78,7 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
base_url,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
@@ -507,6 +508,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
self.assertEqual(url_or_none(''), None)
|
||||
self.assertEqual(url_or_none('foo'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
@@ -717,6 +728,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
|
@@ -305,8 +305,8 @@ class YoutubeDL(object):
|
||||
http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
|
||||
otherwise prefer ffmpeg.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
|
||||
|
@@ -2787,6 +2787,12 @@ except NameError: # Python 3
|
||||
compat_numeric_types = (int, float, complex)
|
||||
|
||||
|
||||
try:
|
||||
compat_integer_types = (int, long)
|
||||
except NameError: # Python 3
|
||||
compat_integer_types = (int, )
|
||||
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||
host, port = address
|
||||
@@ -2974,6 +2980,7 @@ __all__ = [
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_input',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_numeric_types',
|
||||
|
@@ -2,7 +2,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import urljoin
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
|
@@ -217,10 +217,11 @@ class HttpFD(FileDownloader):
|
||||
before = start # start measuring
|
||||
|
||||
def retry(e):
|
||||
if ctx.tmpfilename != '-':
|
||||
to_stdout = ctx.tmpfilename == '-'
|
||||
if not to_stdout:
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
|
@@ -24,13 +24,12 @@ class RtmpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
def run_rtmpdump(args):
|
||||
start = time.time()
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
|
||||
def dl():
|
||||
resume_percent = None
|
||||
resume_downloaded_data_len = None
|
||||
proc_stderr_closed = False
|
||||
proc_stderr_closed = False
|
||||
try:
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = ''
|
||||
@@ -90,12 +89,8 @@ class RtmpFD(FileDownloader):
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen('[rtmpdump] ' + line)
|
||||
|
||||
try:
|
||||
dl()
|
||||
finally:
|
||||
proc.wait()
|
||||
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
|
@@ -105,22 +105,22 @@ class ABCIE(InfoExtractor):
|
||||
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
|
||||
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'ZY9247A021S00',
|
||||
'id': 'ZX9371A050S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Gaston's Visit",
|
||||
'title': "Gaston's Birthday",
|
||||
'series': "Ben And Holly's Little Kingdom",
|
||||
'description': 'md5:18db170ad71cf161e006a4c688e33155',
|
||||
'upload_date': '20180318',
|
||||
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
||||
'upload_date': '20180604',
|
||||
'uploader_id': 'abc4kids',
|
||||
'timestamp': 1521400959,
|
||||
'timestamp': 1528140219,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -129,17 +129,16 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
video_params = self._download_json(
|
||||
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
||||
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber')
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
sig = hmac.new(
|
||||
'android.content.res.Resources'.encode('utf-8'),
|
||||
b'android.content.res.Resources',
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
@@ -169,18 +168,26 @@ class ABCIViewIE(InfoExtractor):
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
|
||||
is_live = video_params.get('livestream') == '1'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(title),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||
'title': title,
|
||||
'description': video_params.get('description'),
|
||||
'thumbnail': video_params.get('thumbnail'),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
'season_number': int_or_none(self._search_regex(
|
||||
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not video_id:
|
||||
entries = []
|
||||
for episode in video_data.get('archiveEpisodes', []):
|
||||
episode_url = episode.get('url')
|
||||
episode_url = url_or_none(episode.get('url'))
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = file_element.text
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
|
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
@@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
subtitle_href = url_or_none(subtitle.get('href'))
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
@@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
media_url = url_or_none(media.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
@@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'url': media_url,
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = rendition.get('url')
|
||||
video_url = url_or_none(rendition.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
|
@@ -4,10 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -68,8 +68,8 @@ class APAIE(InfoExtractor):
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = source.get('file')
|
||||
if not source_url or not isinstance(source_url, compat_str):
|
||||
source_url = url_or_none(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = item.get('file')
|
||||
file_url = url_or_none(item.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -15,6 +14,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
@@ -100,7 +100,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
|
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -131,8 +132,8 @@ class BandcampIE(InfoExtractor):
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
@@ -306,7 +307,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
|
@@ -21,7 +21,6 @@ from ..utils import (
|
||||
urljoin,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -334,14 +333,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
|
||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
@@ -784,6 +778,17 @@ class BBCIE(BBCCoUkIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
'info_dict': {
|
||||
'id': 'b0b9z4vz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prom 6: An American in Paris and Turangalila',
|
||||
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -1006,6 +1011,36 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
|
@@ -114,7 +114,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
|
@@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
@@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = video.get('url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
|
@@ -572,7 +572,8 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if ext == 'ism' or container == 'WVM':
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -629,6 +630,14 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
|
@@ -2,10 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -56,8 +56,8 @@ class CamModelsIE(InfoExtractor):
|
||||
for media in encodings:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
media_url = media.get('location')
|
||||
if not media_url or not isinstance(media_url, compat_str):
|
||||
media_url = url_or_none(media.get('location'))
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
format_id_list = [format_id]
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title').strip()
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
@@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
@@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
}
|
||||
})
|
||||
|
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
|
||||
media_url = media['media']['url']
|
||||
if isinstance(media_url, list):
|
||||
for format_ in media_url:
|
||||
format_url = format_.get('file')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
|
@@ -19,6 +19,7 @@ from ..compat import (
|
||||
compat_cookies,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_integer_types,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
@@ -51,6 +52,7 @@ from ..utils import (
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
JSON_LD_RE,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
@@ -548,8 +550,26 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
""" Returns the response handle """
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, compat_urllib_error.HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
if isinstance(expected_status, compat_integer_types):
|
||||
return err.code == expected_status
|
||||
elif isinstance(expected_status, (list, tuple)):
|
||||
return err.code in expected_status
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
else:
|
||||
assert False
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the response handle.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
elif note is not False:
|
||||
@@ -578,6 +598,10 @@ class InfoExtractor(object):
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
if errnote is None:
|
||||
@@ -590,13 +614,17 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return a tuple (page content as string, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
@@ -685,13 +713,52 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns the data of the page as a string """
|
||||
def _download_webpage(
|
||||
self, url_or_request, video_id, note=None, errnote=None,
|
||||
fatal=True, tries=1, timeout=5, encoding=None, data=None,
|
||||
headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the data of the page as a string.
|
||||
|
||||
Arguments:
|
||||
url_or_request -- plain text URL as a string or
|
||||
a compat_urllib_request.Requestobject
|
||||
video_id -- Video/playlist/item identifier (string)
|
||||
|
||||
Keyword arguments:
|
||||
note -- note printed before downloading (string)
|
||||
errnote -- note printed in case of an error (string)
|
||||
fatal -- flag denoting whether error should be considered fatal,
|
||||
i.e. whether it should cause ExtractionError to be raised,
|
||||
otherwise a warning will be reported and extraction continued
|
||||
tries -- number of tries
|
||||
timeout -- sleep interval between tries
|
||||
encoding -- encoding for a page content decoding, guessed automatically
|
||||
when not explicitly specified
|
||||
data -- POST data (bytes)
|
||||
headers -- HTTP headers (dict)
|
||||
query -- URL query (dict)
|
||||
expected_status -- allows to accept failed HTTP requests (non 2xx
|
||||
status code) by explicitly specifying a set of accepted status
|
||||
codes. Can be any of the following entities:
|
||||
- an integer type specifying an exact failed status code to
|
||||
accept
|
||||
- a list or a tuple of integer types specifying a list of
|
||||
failed status codes to accept
|
||||
- a callable accepting an actual failed status code and
|
||||
returning True if it should be accepted
|
||||
Note that this argument does not affect success status codes (2xx)
|
||||
which are always accepted.
|
||||
"""
|
||||
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
@@ -707,11 +774,17 @@ class InfoExtractor(object):
|
||||
def _download_xml_handle(
|
||||
self, url_or_request, video_id, note='Downloading XML',
|
||||
errnote='Unable to download XML', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
if res is False:
|
||||
return res
|
||||
xml_string, urlh = res
|
||||
@@ -719,15 +792,21 @@ class InfoExtractor(object):
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal), urlh
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
def _download_xml(
|
||||
self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the xml as an xml.etree.ElementTree.Element.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_xml_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||
@@ -745,11 +824,17 @@ class InfoExtractor(object):
|
||||
def _download_json_handle(
|
||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (JSON object, URL handle)"""
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (JSON object, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
if res is False:
|
||||
return res
|
||||
json_string, urlh = res
|
||||
@@ -760,11 +845,18 @@ class InfoExtractor(object):
|
||||
def _download_json(
|
||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return the JSON object as a dict.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_json_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||
@@ -1058,8 +1150,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
@@ -1768,9 +1859,7 @@ class InfoExtractor(object):
|
||||
'height': height,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
elif src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
@@ -1780,9 +1869,13 @@ class InfoExtractor(object):
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse_urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
@@ -1793,7 +1886,6 @@ class InfoExtractor(object):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
return formats
|
||||
|
||||
@@ -2015,7 +2107,21 @@ class InfoExtractor(object):
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/rg3/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
@@ -2346,6 +2452,8 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
@@ -4,23 +4,21 @@ from __future__ import unicode_literals, division
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# geo restricted to CA
|
||||
'url': 'https://www.crackle.com/andromeda/2502343',
|
||||
'info_dict': {
|
||||
@@ -45,7 +43,10 @@ class CrackleIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.sonycrackle.com/andromeda/2502343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -83,8 +84,8 @@ class CrackleIE(InfoExtractor):
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(e.get('Path'))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
@@ -121,8 +122,8 @@ class CrackleIE(InfoExtractor):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file.get('Path'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
@@ -1,12 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_struct_pack
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
error_to_compat_str,
|
||||
@@ -64,7 +68,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader': 'Deadline',
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -141,7 +144,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count_str = self._search_regex(
|
||||
@@ -167,6 +171,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
|
||||
if metadata.get('error', {}).get('type') == 'password_protected':
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password:
|
||||
r = int(metadata['id'][1:], 36)
|
||||
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
|
||||
t = ''.join(random.choice(string.ascii_letters) for i in range(10))
|
||||
n = us64e(compat_struct_pack('I', r))
|
||||
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
|
||||
metadata = self._download_json(
|
||||
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
|
||||
|
||||
self._check_error(metadata)
|
||||
|
||||
formats = []
|
||||
@@ -302,8 +317,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _check_error(self, info):
|
||||
error = info.get('error')
|
||||
if info.get('error') is not None:
|
||||
title = error['title']
|
||||
if error:
|
||||
title = error.get('title') or error['message']
|
||||
# See https://developer.dailymotion.com/api#access-error
|
||||
if error.get('code') == 'DM007':
|
||||
self.raise_geo_restricted(msg=title)
|
||||
|
@@ -5,13 +5,16 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# 4x3
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
@@ -19,37 +22,55 @@ class DctpTvIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 71.24,
|
||||
'timestamp': 1302172322,
|
||||
'upload_date': '20110407',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# 16x9
|
||||
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
version = self._download_json(
|
||||
'%s/version.json' % self._BASE_URL, display_id,
|
||||
'Downloading version JSON')
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'DC.identifier', webpage, 'video id',
|
||||
default=None) or self._search_regex(
|
||||
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
|
||||
restapi_base = '%s/%s/restapi' % (
|
||||
self._BASE_URL, version['version_name'])
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
info = self._download_json(
|
||||
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
|
||||
'Downloading video info JSON')
|
||||
|
||||
media = self._download_json(
|
||||
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
|
||||
display_id, 'Downloading media JSON')
|
||||
|
||||
uuid = media['uuid']
|
||||
title = media['title']
|
||||
ratio = '16x9' if media.get('is_wide') else '4x3'
|
||||
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
||||
|
||||
servers = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
||||
note='Downloading server list', fatal=False)
|
||||
note='Downloading server list JSON', fatal=False)
|
||||
|
||||
if servers:
|
||||
endpoint = next(
|
||||
server['endpoint']
|
||||
for server in servers
|
||||
if isinstance(server.get('endpoint'), compat_str) and
|
||||
if url_or_none(server.get('endpoint')) and
|
||||
'cloudfront' in server['endpoint'])
|
||||
else:
|
||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||
@@ -60,27 +81,35 @@ class DctpTvIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': endpoint,
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
|
||||
'play_path': play_path,
|
||||
'page_url': url,
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
||||
'ext': 'flv',
|
||||
}]
|
||||
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = float_or_none(self._search_regex(
|
||||
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
|
||||
default=None), scale=1000)
|
||||
thumbnails = []
|
||||
images = media.get('images')
|
||||
if isinstance(images, list):
|
||||
for image in images:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'id': uuid,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'title': title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description') or media.get('teaser'),
|
||||
'timestamp': unified_timestamp(media.get('created')),
|
||||
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
@@ -12,6 +11,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
|
@@ -3,8 +3,8 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from .dplay import DPlayIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
@@ -12,8 +12,13 @@ from ..compat import (
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
|
||||
class DiscoveryNetworksDeIE(DPlayIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
|
||||
(?:
|
||||
.*\#(?P<id>\d+)|
|
||||
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
|
||||
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
|
||||
)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||
@@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
alternate_id = mobj.group('alternate_id')
|
||||
if alternate_id:
|
||||
self._initialize_geo_bypass({
|
||||
'countries': ['DE'],
|
||||
})
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (mobj.group('programme'), alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
|
||||
brightcove_id = mobj.group('id')
|
||||
if not brightcove_id:
|
||||
title = mobj.group('title')
|
||||
|
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
@@ -97,6 +98,75 @@ class DPlayIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm):
|
||||
disco_base = 'https://' + disco_host
|
||||
token = self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
headers=headers, query={
|
||||
'include': 'show'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id, headers=headers)['data']['attributes']['streaming'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id='dash', fatal=False))
|
||||
elif format_id == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
@@ -113,72 +183,8 @@ class DPlayIE(InfoExtractor):
|
||||
|
||||
if not video_id:
|
||||
host = mobj.group('host')
|
||||
disco_base = 'https://disco-api.%s' % host
|
||||
self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': host.replace('.', ''),
|
||||
})
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
|
||||
}, query={
|
||||
'include': 'show'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id)['data']['attributes']['streaming'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id='dash', fatal=False))
|
||||
elif format_id == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'disco-api.' + host, host.replace('.', ''))
|
||||
|
||||
info = self._download_json(
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
@@ -305,9 +311,11 @@ class DPlayItIE(InfoExtractor):
|
||||
|
||||
if not info:
|
||||
info_url = self._search_regex(
|
||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||
webpage, 'info url')
|
||||
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
||||
webpage, 'info url', group='url')
|
||||
|
||||
info_url = urljoin(url, info_url)
|
||||
video_id = info_url.rpartition('/')[-1]
|
||||
|
||||
try:
|
||||
@@ -317,6 +325,8 @@ class DPlayItIE(InfoExtractor):
|
||||
'dplayit_token').value,
|
||||
'Referer': url,
|
||||
})
|
||||
if isinstance(info, compat_str):
|
||||
info = self._parse_json(info, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
@@ -332,6 +342,7 @@ class DPlayItIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||
|
@@ -7,7 +7,6 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
sub_url = url_or_none(sub.get('url'))
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
@@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_url = lesson.get('http_url')
|
||||
if not lesson_url or not isinstance(lesson_url, compat_str):
|
||||
lesson_url = url_or_none(lesson.get('http_url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = lesson.get('id')
|
||||
if lesson_id:
|
||||
@@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for _, format_url in lesson['media_urls'].items():
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
src = url_or_none(format_dict.get('src'))
|
||||
if not src or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
|
77
youtube_dl/extractor/expressen.py
Normal file
77
youtube_dl/extractor/expressen.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||
'info_dict': {
|
||||
'id': '8690962',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
|
||||
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 788,
|
||||
'timestamp': 1526639109,
|
||||
'upload_date': '20180518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def extract_data(name):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, 'info', group='value'),
|
||||
display_id, transform_source=unescapeHTML)
|
||||
|
||||
info = extract_data('video-tracking-info')
|
||||
video_id = info['videoId']
|
||||
|
||||
data = extract_data('article-data')
|
||||
stream = data['stream']
|
||||
|
||||
if determine_ext(stream) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{
|
||||
'url': stream,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info.get('titleRaw') or data['title']
|
||||
description = info.get('descriptionRaw')
|
||||
thumbnail = info.get('socialMediaImage') or data.get('image')
|
||||
duration = int_or_none(info.get('videoTotalSecondsDuration') or
|
||||
data.get('totalSecondsDuration'))
|
||||
timestamp = unified_timestamp(info.get('publishDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -335,6 +335,7 @@ from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import (
|
||||
@@ -372,7 +373,6 @@ from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
@@ -390,6 +390,11 @@ from .francetv import (
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .funimation import FunimationIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
@@ -589,6 +594,10 @@ from .mangomolo import (
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
)
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
@@ -759,7 +768,9 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@@ -1036,6 +1047,7 @@ from .stretchinternet import StretchInternetIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPageIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
@@ -1139,6 +1151,7 @@ from .tvc import (
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvn24 import TVN24IE
|
||||
from .tvnet import TVNetIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
parse_count,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
@@ -75,7 +76,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'title': 're:^Asif Nawab Butt posted a video',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@@ -133,7 +134,7 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -142,6 +143,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@@ -149,7 +151,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||
'title': 'md5:1db063d6a8c13faa8da727817339c857',
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
@@ -176,7 +178,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'title': 'md5:19a428bbde91364e3de815383b54a235',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
@@ -426,6 +428,10 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@@ -433,6 +439,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
|
@@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
@@ -68,21 +76,41 @@ class FoxNewsIE(AMPIE):
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# data-video-id
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'timestamp': 1473301045,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||
'info_dict': {
|
||||
'id': '5748266721001',
|
||||
'ext': 'flv',
|
||||
'title': 'Kyle Kashuv has a positive message for the Trump White House',
|
||||
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 229,
|
||||
'timestamp': 1520594670,
|
||||
'upload_date': '20180309',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -90,51 +118,10 @@ class FoxNewsArticleIE(InfoExtractor):
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
webpage, 'video ID', group='id', default=None)
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
|
||||
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'ext': 'mp4',
|
||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
||||
'description': 'Is campus censorship getting out of control?',
|
||||
'timestamp': 1472168725,
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FoxNewsIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
@@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
signed_url = url_or_none(self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
}))
|
||||
if signed_url:
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
|
263
youtube_dl/extractor/frontendmasters.py
Normal file
263
youtube_dl/extractor/frontendmasters.py
Normal file
@@ -0,0 +1,263 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class FrontendMastersBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
|
||||
_LOGIN_URL = 'https://frontendmasters.com/login/'
|
||||
|
||||
_NETRC_MACHINE = 'frontendmasters'
|
||||
|
||||
_QUALITIES = {
|
||||
'low': {'width': 480, 'height': 360},
|
||||
'mid': {'width': 1280, 'height': 720},
|
||||
'high': {'width': 1920, 'height': 1080}
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post_url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if any(p in response for p in (
|
||||
'wp-login.php?action=logout', '>Logout')):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
|
||||
response, 'error message', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
|
||||
def _download_course(self, course_name, url):
|
||||
return self._download_json(
|
||||
'%s/courses/%s' % (self._API_BASE, course_name), course_name,
|
||||
'Downloading course JSON', headers={'Referer': url})
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters(course):
|
||||
chapters = []
|
||||
lesson_elements = course.get('lessonElements')
|
||||
if isinstance(lesson_elements, list):
|
||||
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _extract_lesson(chapters, lesson_id, lesson):
|
||||
title = lesson.get('title') or lesson_id
|
||||
display_id = lesson.get('slug')
|
||||
description = lesson.get('description')
|
||||
thumbnail = lesson.get('thumbnail')
|
||||
|
||||
chapter_number = None
|
||||
index = lesson.get('index')
|
||||
element_index = lesson.get('elementIndex')
|
||||
if (isinstance(index, int) and isinstance(element_index, int) and
|
||||
index < element_index):
|
||||
chapter_number = element_index - index
|
||||
chapter = (chapters[chapter_number - 1]
|
||||
if chapter_number - 1 < len(chapters) else None)
|
||||
|
||||
duration = None
|
||||
timestamp = lesson.get('timestamp')
|
||||
if isinstance(timestamp, compat_str):
|
||||
mobj = re.search(
|
||||
r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
|
||||
timestamp)
|
||||
if mobj:
|
||||
duration = parse_duration(mobj.group('end')) - parse_duration(
|
||||
mobj.group('start'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'frontendmasters:%s' % lesson_id,
|
||||
'ie_key': FrontendMastersIE.ie_key(),
|
||||
'id': lesson_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersIE(FrontendMastersBaseIE):
|
||||
_VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
|
||||
'md5': '7f161159710d6b7016a4f4af6fcb05e2',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'ext': 'mp4',
|
||||
'title': 'a2qogef6ba',
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}, {
|
||||
'url': 'frontendmasters:a2qogef6ba',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lesson_id = self._match_id(url)
|
||||
|
||||
source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
|
||||
|
||||
formats = []
|
||||
for ext in ('webm', 'mp4'):
|
||||
for quality in ('low', 'mid', 'high'):
|
||||
resolution = self._QUALITIES[quality].copy()
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
format_url = self._download_json(
|
||||
source_url, lesson_id,
|
||||
'Downloading %s source JSON' % format_id, query={
|
||||
'f': ext,
|
||||
'r': resolution['height'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
}, fatal=False)['url']
|
||||
|
||||
if not format_url:
|
||||
continue
|
||||
|
||||
f = resolution.copy()
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': lesson_id,
|
||||
'title': lesson_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/tools',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'display_id': 'tools',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tools',
|
||||
'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'chapter': 'Introduction',
|
||||
'chapter_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
lesson_id, lesson = next(
|
||||
(video_id, data)
|
||||
for video_id, data in course['lessonData'].items()
|
||||
if data.get('slug') == lesson_name)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
return self._extract_lesson(chapters, lesson_id, lesson)
|
||||
|
||||
|
||||
class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/',
|
||||
'info_dict': {
|
||||
'id': 'web-development',
|
||||
'title': 'Introduction to Web Development',
|
||||
'description': 'md5:9317e6e842098bf725d62360e52d49a6',
|
||||
},
|
||||
'playlist_count': 81,
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if FrontendMastersLessonIE.suitable(url) else super(
|
||||
FrontendMastersBaseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_name = self._match_id(url)
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
|
||||
lessons = sorted(
|
||||
course['lessonData'].values(), key=lambda data: data['index'])
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_name = lesson.get('slug')
|
||||
if not lesson_name:
|
||||
continue
|
||||
lesson_id = lesson.get('hash') or lesson.get('statsId')
|
||||
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
|
||||
|
||||
title = course.get('title')
|
||||
description = course.get('description')
|
||||
|
||||
return self.playlist_result(entries, course_name, title, description)
|
@@ -5,6 +5,7 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -12,6 +13,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||
}
|
||||
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||
|
||||
@staticmethod
|
||||
def _make_headers(referer):
|
||||
headers = FunkBaseIE._HEADERS.copy()
|
||||
headers['Referer'] = referer
|
||||
return headers
|
||||
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -48,19 +62,19 @@ class FunkMixIE(FunkBaseIE):
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
mix_id, headers=self._make_headers(url), query={
|
||||
'size': 100,
|
||||
})['result']['lists']
|
||||
})['_embedded']['curatedListList']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas if meta.get('alias') == alias)
|
||||
for meta in metas
|
||||
if try_get(
|
||||
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||
compat_str) == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
@@ -104,25 +118,39 @@ class FunkChannelIE(FunkBaseIE):
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
headers = {
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
||||
'Referer': url,
|
||||
}
|
||||
headers = self._make_headers(url)
|
||||
|
||||
video = None
|
||||
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
|
||||
headers=headers, query={
|
||||
'ids': alias,
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON', headers=headers, query={
|
||||
'size': 100,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
if by_channel_alias:
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if video_list:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||
query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
|
||||
if not video:
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
||||
headers=headers, query={
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
|
@@ -32,6 +32,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
@@ -111,6 +112,7 @@ from .cloudflarestream import CloudflareStreamIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -1394,17 +1396,6 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# SVT embed
|
||||
{
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'flv',
|
||||
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# Crooks and Liars embed
|
||||
{
|
||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||
@@ -3076,7 +3067,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
|
||||
|
||||
peertube_urls = PeerTubeIE._extract_urls(webpage)
|
||||
peertube_urls = PeerTubeIE._extract_urls(webpage, url)
|
||||
if peertube_urls:
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
@@ -3091,6 +3082,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
apa_urls, video_id, video_title, ie=APAIE.ie_key())
|
||||
|
||||
foxnews_urls = FoxNewsIE._extract_urls(webpage)
|
||||
if foxnews_urls:
|
||||
return self.playlist_from_matches(
|
||||
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
|
||||
|
||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||
webpage)]
|
||||
@@ -3135,8 +3131,8 @@ class GenericIE(InfoExtractor):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(source.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -14,8 +15,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class Go90IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||
'info_dict': {
|
||||
@@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
|
||||
'upload_date': '20170411',
|
||||
'age_limit': 14,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.go90.com/embed/261MflWkD3N',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id,
|
||||
video_id, headers={
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=b'{"client":"web","device_type":"pc"}')
|
||||
})
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id, video_id,
|
||||
headers=headers, data=b'{"client":"web","device_type":"pc"}')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
|
||||
if 'region unavailable' in message:
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
if video_data.get('requires_drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
@@ -8,6 +8,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -80,8 +81,8 @@ class HiDiveIE(InfoExtractor):
|
||||
bitrates = rendition.get('bitrates')
|
||||
if not isinstance(bitrates, dict):
|
||||
continue
|
||||
m3u8_url = bitrates.get('hls')
|
||||
if not isinstance(m3u8_url, compat_str):
|
||||
m3u8_url = url_or_none(bitrates.get('hls'))
|
||||
if not m3u8_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -93,9 +94,8 @@ class HiDiveIE(InfoExtractor):
|
||||
if not isinstance(cc_file, list) or len(cc_file) < 3:
|
||||
continue
|
||||
cc_lang = cc_file[0]
|
||||
cc_url = cc_file[2]
|
||||
if not isinstance(cc_lang, compat_str) or not isinstance(
|
||||
cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file[2])
|
||||
if not isinstance(cc_lang, compat_str) or not cc_url:
|
||||
continue
|
||||
subtitles.setdefault(cc_lang, []).append({
|
||||
'url': cc_url,
|
||||
|
@@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
qualities,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -61,8 +61,8 @@ class ImdbIE(InfoExtractor):
|
||||
for encoding in video_metadata.get('encodings', []):
|
||||
if not encoding or not isinstance(encoding, dict):
|
||||
continue
|
||||
video_url = encoding.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(encoding.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
|
||||
if ext == 'm3u8':
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class ImgurIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
|
||||
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://i.imgur.com/A61SaA1.gifv',
|
||||
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://imgur.com/r/aww/VQcQPhM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://i.imgur.com/crGpqCV.mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -21,6 +21,21 @@ class IncIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# div with id=kaltura_player_1_kqs38cgm
|
||||
'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
|
||||
'info_dict': {
|
||||
'id': '1_kqs38cgm',
|
||||
'ext': 'mp4',
|
||||
'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
|
||||
'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
|
||||
'timestamp': 1364403232,
|
||||
'upload_date': '20130327',
|
||||
'uploader_id': 'incdigital@inc.com',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
|
||||
'only_matching': True,
|
||||
@@ -31,10 +46,13 @@ class IncIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
|
||||
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
|
||||
'partner id', default='1034971')
|
||||
|
||||
kaltura_id = self._parse_json(self._search_regex(
|
||||
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
||||
kaltura_id = self._search_regex(
|
||||
r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
|
||||
default=None, group='id') or self._parse_json(self._search_regex(
|
||||
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
|
||||
display_id)['vid_kaltura_id']
|
||||
|
||||
return self.url_result(
|
||||
|
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
lowercase_escape,
|
||||
std_headers,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
|
@@ -13,15 +13,17 @@ from ..compat import (
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
url_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -129,64 +131,65 @@ class ITVIE(InfoExtractor):
|
||||
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env))
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
elif fault_code not in (
|
||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'episode_title': params.get('data-video-episode'),
|
||||
'series': params.get('data-video-title'),
|
||||
})
|
||||
else:
|
||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||
info.update({
|
||||
'title': title,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
})
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
headers=headers, data=etree.tostring(req_env), fatal=False)
|
||||
if resp_env:
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
elif fault_code not in (
|
||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'episode_title': params.get('data-video-episode'),
|
||||
'series': params.get('data-video-title'),
|
||||
})
|
||||
else:
|
||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||
info.update({
|
||||
'title': title,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
})
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if caption_url.text:
|
||||
extract_subtitle(caption_url.text)
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if caption_url.text:
|
||||
extract_subtitle(caption_url.text)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
||||
hmac = params.get('data-video-hmac')
|
||||
@@ -248,8 +251,8 @@ class ITVIE(InfoExtractor):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = sub.get('Href')
|
||||
if isinstance(href, compat_str):
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if href:
|
||||
extract_subtitle(href)
|
||||
if not info.get('duration'):
|
||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
||||
@@ -261,7 +264,17 @@ class ITVIE(InfoExtractor):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
webpage_info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not webpage_info.get('title'):
|
||||
webpage_info['title'] = self._html_search_regex(
|
||||
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or webpage_info['episode']
|
||||
|
||||
return merge_dicts(info, webpage_info)
|
||||
|
||||
|
||||
class ITVBTCCIE(InfoExtractor):
|
||||
|
@@ -7,6 +7,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_uri = url_or_none(a_format.get('uri'))
|
||||
if not format_uri:
|
||||
continue
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': a_format['uri'],
|
||||
'url': self._proto_relative_url(format_uri, 'https:'),
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
|
@@ -18,7 +18,7 @@ class JojIE(InfoExtractor):
|
||||
joj:|
|
||||
https?://media\.joj\.sk/embed/
|
||||
)
|
||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
||||
(?P<id>[^/?#^]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||
@@ -29,16 +29,24 @@ class JojIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 3118,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://media.joj.sk/embed/9i1cxv',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'joj:9i1cxv',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||
webpage)
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -4,16 +4,14 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_decrypt_text
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
|
||||
encrypted = False
|
||||
|
||||
def extract_format(format_url, height=None):
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//')):
|
||||
return
|
||||
if format_url in format_urls:
|
||||
return
|
||||
|
@@ -2,11 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
|
||||
captions = source.get('captionsAvailableLanguages')
|
||||
if isinstance(captions, dict):
|
||||
for lang, subtitle_url in captions.items():
|
||||
if lang != 'none' and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = url_or_none(subtitle_url)
|
||||
if lang != 'none' and subtitle_url:
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
|
@@ -4,7 +4,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):
|
||||
form_data = self._hidden_inputs(form_html)
|
||||
form_data.update(extra_form_data)
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
action_url, None, note,
|
||||
data=urlencode_postdata(form_data),
|
||||
headers={
|
||||
'Referer': referrer_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
response = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
self._check_error(response, ('email', 'password'))
|
||||
raise
|
||||
response = self._download_json(
|
||||
action_url, None, note,
|
||||
data=urlencode_postdata(form_data),
|
||||
headers={
|
||||
'Referer': referrer_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, expected_status=(418, 500, ))
|
||||
|
||||
self._check_error(response, 'ErrorMessage')
|
||||
self._check_error(response, ('email', 'password', 'ErrorMessage'))
|
||||
|
||||
return response, action_url
|
||||
|
||||
|
125
youtube_dl/extractor/markiza.py
Normal file
125
youtube_dl/extractor/markiza.py
Normal file
@@ -0,0 +1,125 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MarkizaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
|
||||
'md5': 'ada4e9fad038abeed971843aa028c7b0',
|
||||
'info_dict': {
|
||||
'id': '139078',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oteckovia 109',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2760,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
|
||||
'info_dict': {
|
||||
'id': '85430',
|
||||
'title': 'Televízne noviny',
|
||||
},
|
||||
'playlist_count': 23,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/84723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://videoarchiv.markiza.sk/embed/85295',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
|
||||
video_id, query={'id': video_id})
|
||||
|
||||
info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
|
||||
|
||||
if info.get('_type') == 'playlist':
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': try_get(
|
||||
data, lambda x: x['details']['name'], compat_str),
|
||||
})
|
||||
else:
|
||||
info['duration'] = parse_duration(
|
||||
try_get(data, lambda x: x['details']['duration'], compat_str))
|
||||
return info
|
||||
|
||||
|
||||
class MarkizaPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
|
||||
'md5': 'ada4e9fad038abeed971843aa028c7b0',
|
||||
'info_dict': {
|
||||
'id': '139355',
|
||||
'ext': 'mp4',
|
||||
'title': 'Oteckovia 110',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2604,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
# Downloading for some hosts (e.g. dajto, doma) fails with 500
|
||||
# although everything seems to be OK, so considering 500
|
||||
# status code to be expected.
|
||||
url, playlist_id, expected_status=500)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
|
||||
webpage))]
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
@@ -3,59 +3,75 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from .theplatform import ThePlatformBaseIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class MediasetIE(InfoExtractor):
|
||||
class MediasetIE(ThePlatformBaseIE):
|
||||
_TP_TLD = 'eu'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
mediaset:|
|
||||
https?://
|
||||
(?:www\.)?video\.mediaset\.it/
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9]+)
|
||||
)(?P<id>[0-9A-Z]{16})
|
||||
'''
|
||||
_TESTS = [{
|
||||
# full episode
|
||||
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
|
||||
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
|
||||
'info_dict': {
|
||||
'id': '661824',
|
||||
'id': 'FAFU000000661824',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quarta puntata',
|
||||
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1414,
|
||||
'creator': 'mediaset',
|
||||
'duration': 1414.26,
|
||||
'upload_date': '20161107',
|
||||
'series': 'Hello Goodbye',
|
||||
'categories': ['reality'],
|
||||
'timestamp': 1478532900,
|
||||
'uploader': 'Rete 4',
|
||||
'uploader_id': 'R4',
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
|
||||
'md5': '288532f0ad18307705b01e581304cd7b',
|
||||
'info_dict': {
|
||||
'id': 'F309013801000501',
|
||||
'ext': 'mp4',
|
||||
'title': 'Puntata del 25 maggio',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 6565.007,
|
||||
'upload_date': '20180526',
|
||||
'series': 'Matrix',
|
||||
'timestamp': 1527326245,
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe simple
|
||||
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
|
||||
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
|
||||
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mediaset:661824',
|
||||
'url': 'mediaset:FAFU000000665924',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -68,51 +84,54 @@ class MediasetIE(InfoExtractor):
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_list = self._download_json(
|
||||
'http://cdnsel01.mediaset.net/GetCdn.aspx',
|
||||
video_id, 'Downloading video CDN JSON', query={
|
||||
'streamid': video_id,
|
||||
'format': 'json',
|
||||
})['videoList']
|
||||
guid = self._match_id(url)
|
||||
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
||||
info = self._extract_theplatform_metadata(tp_path, guid)
|
||||
|
||||
formats = []
|
||||
for format_url in video_list:
|
||||
if '.ism' in format_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
format_url, video_id, ism_id='mss', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': determine_ext(format_url),
|
||||
})
|
||||
subtitles = {}
|
||||
first_e = None
|
||||
for asset_type in ('SD', 'HD'):
|
||||
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||
'mbr': 'true',
|
||||
'formats': f,
|
||||
'assetTypes': asset_type,
|
||||
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
|
||||
except ExtractorError as e:
|
||||
if not first_e:
|
||||
first_e = e
|
||||
break
|
||||
for tp_f in tp_formats:
|
||||
tp_f['quality'] = 1 if asset_type == 'HD' else 0
|
||||
formats.extend(tp_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
|
||||
if first_e and not formats:
|
||||
raise first_e
|
||||
self._sort_formats(formats)
|
||||
|
||||
mediainfo = self._download_json(
|
||||
'http://plr.video.mediaset.it/html/metainfo.sjson',
|
||||
video_id, 'Downloading video info JSON', query={
|
||||
'id': video_id,
|
||||
})['video']
|
||||
fields = []
|
||||
for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
|
||||
fields.extend(templ % repl for repl in repls)
|
||||
feed_data = self._download_json(
|
||||
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
|
||||
guid, fatal=False, query={'fields': ','.join(fields)})
|
||||
if feed_data:
|
||||
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
|
||||
info.update({
|
||||
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
|
||||
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
|
||||
'series': feed_data.get('mediasetprogram$brandTitle'),
|
||||
'uploader': publish_info.get('description'),
|
||||
'uploader_id': publish_info.get('channel'),
|
||||
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
|
||||
})
|
||||
|
||||
title = mediainfo['title']
|
||||
|
||||
creator = try_get(
|
||||
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
|
||||
category = try_get(
|
||||
mediainfo, lambda x: x['brand-info']['category'], compat_str)
|
||||
categories = [category] if category else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': mediainfo.get('short-description'),
|
||||
'thumbnail': mediainfo.get('thumbnail'),
|
||||
'duration': parse_duration(mediainfo.get('duration')),
|
||||
'creator': creator,
|
||||
'upload_date': unified_strdate(mediainfo.get('production-date')),
|
||||
'webpage_url': mediainfo.get('url'),
|
||||
'series': mediainfo.get('brand-value'),
|
||||
'categories': categories,
|
||||
info.update({
|
||||
'id': guid,
|
||||
'formats': formats,
|
||||
}
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
|
||||
|
||||
stream_formats = []
|
||||
for unum, VideoUrl in enumerate(video_urls):
|
||||
video_url = VideoUrl.get('Location')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(VideoUrl.get('Location'))
|
||||
if not video_url:
|
||||
continue
|
||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||
|
||||
|
@@ -77,8 +77,11 @@ class MotherlessIE(InfoExtractor):
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
video_url = self._html_search_regex(
|
||||
r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
|
||||
video_url = (self._html_search_regex(
|
||||
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
webpage, 'video URL', default=None, group='url') or
|
||||
'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Views</strong>\s+([^<]+)<',
|
||||
@@ -120,7 +123,7 @@ class MotherlessIE(InfoExtractor):
|
||||
|
||||
|
||||
class MotherlessGroupIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://motherless.com/g/movie_scenes',
|
||||
'info_dict': {
|
||||
|
@@ -29,14 +29,13 @@ class NexxIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# movie
|
||||
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
|
||||
'md5': '828cea195be04e66057b846288295ba1',
|
||||
'md5': '31899fd683de49ad46f4ee67e53e83fe',
|
||||
'info_dict': {
|
||||
'id': '128907',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stiftung Warentest',
|
||||
'alt_title': 'Wie ein Test abläuft',
|
||||
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
|
||||
'release_year': 2013,
|
||||
'creator': 'SPIEGEL TV',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2509,
|
||||
@@ -62,6 +61,7 @@ class NexxIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
# does not work via arc
|
||||
'url': 'nexx:741:1269984',
|
||||
@@ -71,12 +71,26 @@ class NexxIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
|
||||
'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 607,
|
||||
'timestamp': 1518614955,
|
||||
'upload_date': '20180214',
|
||||
},
|
||||
}, {
|
||||
# free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
|
||||
'url': 'nexx:747:1533779',
|
||||
'md5': '6bf6883912b82b7069fb86c2297e9893',
|
||||
'info_dict': {
|
||||
'id': '1533779',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aufregung um ausgebrochene Raubtiere',
|
||||
'alt_title': 'Eifel-Zoo',
|
||||
'description': 'md5:f21375c91c74ad741dcb164c427999d2',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 111,
|
||||
'timestamp': 1527874460,
|
||||
'upload_date': '20180601',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
|
||||
'only_matching': True,
|
||||
@@ -141,6 +155,139 @@ class NexxIE(InfoExtractor):
|
||||
self._handle_error(result)
|
||||
return result['result']
|
||||
|
||||
def _extract_free_formats(self, video, video_id):
|
||||
stream_data = video['streamdata']
|
||||
cdn = stream_data['cdnType']
|
||||
assert cdn == 'free'
|
||||
|
||||
hash = video['general']['hash']
|
||||
|
||||
ps = compat_str(stream_data['originalDomain'])
|
||||
if stream_data['applyFolderHierarchy'] == 1:
|
||||
s = ('%04d' % int(video_id))[::-1]
|
||||
ps += '/%s/%s' % (s[0:2], s[2:4])
|
||||
ps += '/%s/%s_' % (video_id, hash)
|
||||
|
||||
t = 'http://%s' + ps
|
||||
fd = stream_data['azureFileDistribution'].split(',')
|
||||
cdn_provider = stream_data['cdnProvider']
|
||||
|
||||
def p0(p):
|
||||
return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
|
||||
|
||||
formats = []
|
||||
if cdn_provider == 'ak':
|
||||
t += ','
|
||||
for i in fd:
|
||||
p = i.split(':')
|
||||
t += p[1] + p0(int(p[0])) + ','
|
||||
t += '.mp4.csmil/master.%s'
|
||||
elif cdn_provider == 'ce':
|
||||
k = t.split('/')
|
||||
h = k.pop()
|
||||
http_base = t = '/'.join(k)
|
||||
http_base = http_base % stream_data['cdnPathHTTP']
|
||||
t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
|
||||
for i in fd:
|
||||
p = i.split(':')
|
||||
tbr = int(p[0])
|
||||
filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
|
||||
f = {
|
||||
'url': http_base + '/' + filename,
|
||||
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||
'tbr': tbr,
|
||||
}
|
||||
width_height = p[1].split('x')
|
||||
if len(width_height) == 2:
|
||||
f.update({
|
||||
'width': int_or_none(width_height[0]),
|
||||
'height': int_or_none(width_height[1]),
|
||||
})
|
||||
formats.append(f)
|
||||
a = filename + ':%s' % (tbr * 1000)
|
||||
t += a + ','
|
||||
t = t[:-1] + '&audiostream=' + a.split(':')[0]
|
||||
else:
|
||||
assert False
|
||||
|
||||
if cdn_provider == 'ce':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
|
||||
mpd_id='%s-dash' % cdn, fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_azure_formats(self, video, video_id):
|
||||
stream_data = video['streamdata']
|
||||
cdn = stream_data['cdnType']
|
||||
assert cdn == 'azure'
|
||||
|
||||
azure_locator = stream_data['azureLocator']
|
||||
|
||||
def get_cdn_shield_base(shield_type='', static=False):
|
||||
for secure in ('', 's'):
|
||||
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
|
||||
if cdn_shield:
|
||||
return 'http%s://%s' % (secure, cdn_shield)
|
||||
else:
|
||||
if 'fb' in stream_data['azureAccount']:
|
||||
prefix = 'df' if static else 'f'
|
||||
else:
|
||||
prefix = 'd' if static else 'p'
|
||||
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
|
||||
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
|
||||
|
||||
language = video['general'].get('language_raw') or ''
|
||||
|
||||
azure_stream_base = get_cdn_shield_base()
|
||||
is_ml = ',' in language
|
||||
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
|
||||
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
|
||||
|
||||
protection_token = try_get(
|
||||
video, lambda x: x['protectiondata']['token'], compat_str)
|
||||
if protection_token:
|
||||
azure_manifest_url += '?hdnts=%s' % protection_token
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
azure_manifest_url % '(format=m3u8-aapl)',
|
||||
video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='%s-hls' % cdn, fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
azure_manifest_url % '(format=mpd-time-csf)',
|
||||
video_id, mpd_id='%s-dash' % cdn, fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
|
||||
|
||||
azure_progressive_base = get_cdn_shield_base('Prog', True)
|
||||
azure_file_distribution = stream_data.get('azureFileDistribution')
|
||||
if azure_file_distribution:
|
||||
fds = azure_file_distribution.split(',')
|
||||
if fds:
|
||||
for fd in fds:
|
||||
ss = fd.split(':')
|
||||
if len(ss) == 2:
|
||||
tbr = int_or_none(ss[0])
|
||||
if tbr:
|
||||
f = {
|
||||
'url': '%s%s/%s_src_%s_%d.mp4' % (
|
||||
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
|
||||
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||
'tbr': tbr,
|
||||
}
|
||||
width_height = ss[1].split('x')
|
||||
if len(width_height) == 2:
|
||||
f.update({
|
||||
'width': int_or_none(width_height[0]),
|
||||
'height': int_or_none(width_height[1]),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
|
||||
@@ -220,72 +367,15 @@ class NexxIE(InfoExtractor):
|
||||
general = video['general']
|
||||
title = general['title']
|
||||
|
||||
stream_data = video['streamdata']
|
||||
language = general.get('language_raw') or ''
|
||||
cdn = video['streamdata']['cdnType']
|
||||
|
||||
# TODO: reverse more cdns
|
||||
|
||||
cdn = stream_data['cdnType']
|
||||
assert cdn == 'azure'
|
||||
|
||||
azure_locator = stream_data['azureLocator']
|
||||
|
||||
def get_cdn_shield_base(shield_type='', static=False):
|
||||
for secure in ('', 's'):
|
||||
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
|
||||
if cdn_shield:
|
||||
return 'http%s://%s' % (secure, cdn_shield)
|
||||
else:
|
||||
if 'fb' in stream_data['azureAccount']:
|
||||
prefix = 'df' if static else 'f'
|
||||
else:
|
||||
prefix = 'd' if static else 'p'
|
||||
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
|
||||
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
|
||||
|
||||
azure_stream_base = get_cdn_shield_base()
|
||||
is_ml = ',' in language
|
||||
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
|
||||
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
|
||||
|
||||
protection_token = try_get(
|
||||
video, lambda x: x['protectiondata']['token'], compat_str)
|
||||
if protection_token:
|
||||
azure_manifest_url += '?hdnts=%s' % protection_token
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
azure_manifest_url % '(format=m3u8-aapl)',
|
||||
video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='%s-hls' % cdn, fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
azure_manifest_url % '(format=mpd-time-csf)',
|
||||
video_id, mpd_id='%s-dash' % cdn, fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
|
||||
|
||||
azure_progressive_base = get_cdn_shield_base('Prog', True)
|
||||
azure_file_distribution = stream_data.get('azureFileDistribution')
|
||||
if azure_file_distribution:
|
||||
fds = azure_file_distribution.split(',')
|
||||
if fds:
|
||||
for fd in fds:
|
||||
ss = fd.split(':')
|
||||
if len(ss) == 2:
|
||||
tbr = int_or_none(ss[0])
|
||||
if tbr:
|
||||
f = {
|
||||
'url': '%s%s/%s_src_%s_%d.mp4' % (
|
||||
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
|
||||
'format_id': '%s-http-%d' % (cdn, tbr),
|
||||
'tbr': tbr,
|
||||
}
|
||||
width_height = ss[1].split('x')
|
||||
if len(width_height) == 2:
|
||||
f.update({
|
||||
'width': int_or_none(width_height[0]),
|
||||
'height': int_or_none(width_height[1]),
|
||||
})
|
||||
formats.append(f)
|
||||
if cdn == 'azure':
|
||||
formats = self._extract_azure_formats(video, video_id)
|
||||
elif cdn == 'free':
|
||||
formats = self._extract_free_formats(video, video_id)
|
||||
else:
|
||||
# TODO: reverse more cdns
|
||||
assert False
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -36,8 +36,8 @@ class NPOIE(NPOBaseIE):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}|
|
||||
ntr\.nl/(?:[^/]+/){2,}|
|
||||
npo\.nl/(?:[^/]+/)*|
|
||||
(?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
|
||||
omroepwnl\.nl/video/fragment/[^/]+__|
|
||||
(?:zapp|npo3)\.nl/(?:[^/]+/){2,}
|
||||
)
|
||||
@@ -160,8 +160,20 @@ class NPOIE(NPOBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://npo.nl/KN_1698996',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if any(ie.suitable(url)
|
||||
for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE))
|
||||
else super(NPOIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._get_info(video_id)
|
||||
@@ -270,7 +282,7 @@ class NPOIE(NPOBaseIE):
|
||||
video_url = stream_info.get('url')
|
||||
if not video_url or video_url in urls:
|
||||
continue
|
||||
urls.add(item_url)
|
||||
urls.add(video_url)
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4',
|
||||
@@ -389,7 +401,7 @@ class NPOLiveIE(NPOBaseIE):
|
||||
|
||||
class NPORadioIE(InfoExtractor):
|
||||
IE_NAME = 'npo.nl:radio'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.npo.nl/radio/radio-1',
|
||||
@@ -404,6 +416,10 @@ class NPORadioIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _html_get_attribute_regex(attribute):
|
||||
return r'{0}\s*=\s*\'([^\']+)\''.format(attribute)
|
||||
|
@@ -4,24 +4,40 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
JSON_LD_RE,
|
||||
NO_DEFAULT,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
|
||||
_api_host = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://%s/mediaelement/%s' % (self._API_HOST, video_id),
|
||||
video_id, 'Downloading mediaelement JSON')
|
||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||
|
||||
for api_host in api_hosts:
|
||||
data = self._download_json(
|
||||
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||
video_id, 'Downloading mediaelement JSON',
|
||||
fatal=api_host == api_hosts[-1])
|
||||
if not data:
|
||||
continue
|
||||
self._api_host = api_host
|
||||
break
|
||||
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
@@ -191,7 +207,7 @@ class NRKIE(NRKBaseIE):
|
||||
)
|
||||
(?P<id>[^?#&]+)
|
||||
'''
|
||||
_API_HOST = 'v8-psapi.nrk.no'
|
||||
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
@@ -237,8 +253,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
(?:/\d{2}-\d{2}-\d{4})?
|
||||
(?:\#del=(?P<part_id>\d+))?
|
||||
''' % _EPISODE_RE
|
||||
_API_HOST = 'psapi-we.nrk.no'
|
||||
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '4e9ca6629f09e588ed240fb11619922a',
|
||||
@@ -350,6 +365,182 @@ class NRKTVIE(NRKBaseIE):
|
||||
}]
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
|
||||
'info_dict': {
|
||||
'id': 'MSUI14000816AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Backstage 8:30',
|
||||
'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
|
||||
'duration': 1320,
|
||||
'series': 'Backstage',
|
||||
'season_number': 1,
|
||||
'episode_number': 8,
|
||||
'episode': '8:30',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nrk_id = self._parse_json(
|
||||
self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
|
||||
display_id)['@id']
|
||||
|
||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||
return self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
|
||||
|
||||
|
||||
class NRKTVSerieBaseIE(InfoExtractor):
|
||||
def _extract_series(self, webpage, display_id, fatal=True):
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
|
||||
default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False)
|
||||
if not config:
|
||||
return
|
||||
return try_get(config, lambda x: x['series'], dict)
|
||||
|
||||
def _extract_episodes(self, season):
|
||||
entries = []
|
||||
if not isinstance(season, dict):
|
||||
return entries
|
||||
episodes = season.get('episodes')
|
||||
if not isinstance(episodes, list):
|
||||
return entries
|
||||
for episode in episodes:
|
||||
nrk_id = episode.get('prfId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
return entries
|
||||
|
||||
|
||||
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'title': 'Sesong 1',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
|
||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
series = self._extract_series(webpage, display_id)
|
||||
|
||||
season = next(
|
||||
s for s in series['seasons']
|
||||
if int(display_id) == s.get('seasonNumber'))
|
||||
|
||||
title = try_get(season, lambda x: x['titles']['title'], compat_str)
|
||||
return self.playlist_result(
|
||||
self._extract_episodes(season), display_id, title)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# new layout
|
||||
'url': 'https://tv.nrk.no/serie/backstage',
|
||||
'info_dict': {
|
||||
'id': 'backstage',
|
||||
'title': 'Backstage',
|
||||
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
# old layout
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
False if any(ie.suitable(url)
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
|
||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
# New layout (e.g. https://tv.nrk.no/serie/backstage)
|
||||
series = self._extract_series(webpage, series_id, fatal=False)
|
||||
if series:
|
||||
title = try_get(series, lambda x: x['titles']['title'], compat_str)
|
||||
description = try_get(
|
||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||
entries = []
|
||||
for season in series['seasons']:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
# Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKTVDirekteIE(NRKTVIE):
|
||||
IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
|
||||
@@ -429,64 +620,6 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
|
||||
r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'http://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:58afd450974c89e27d5a19212eee7115',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/saving-the-human-race',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if NRKTVIE.suitable(url) else super(NRKTVSeriesIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
|
||||
|
||||
class NRKSkoleIE(InfoExtractor):
|
||||
IE_DESC = 'NRK Skole'
|
||||
_VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
|
||||
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
@@ -360,6 +361,50 @@ class PBSIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/',
|
||||
'info_dict': {
|
||||
'id': '2365936247',
|
||||
'ext': 'mp4',
|
||||
'title': 'Antiques Roadshow - Indianapolis, Hour 2',
|
||||
'description': 'md5:524b32249db55663e7231b6b8d1671a2',
|
||||
'duration': 3180,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/',
|
||||
'info_dict': {
|
||||
'id': '3007193718',
|
||||
'ext': 'mp4',
|
||||
'title': "Victoria - A Soldier's Daughter / The Green-Eyed Monster",
|
||||
'description': 'md5:37efbac85e0c09b009586523ec143652',
|
||||
'duration': 6292,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'https://player.pbs.org/partnerplayer/tOz9tM5ljOXQqIIWke53UA==/',
|
||||
'info_dict': {
|
||||
'id': '3011407934',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stories from the Stage - Road Trip',
|
||||
'duration': 1619,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
},
|
||||
{
|
||||
'url': 'http://player.pbs.org/widget/partnerplayer/2365297708/?start=0&end=0&chapterbar=false&endscreen=false&topbar=true',
|
||||
'only_matching': True,
|
||||
@@ -422,6 +467,8 @@ class PBSIE(InfoExtractor):
|
||||
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//video\.pbs\.org/widget/partnerplayer/(\d+)', # https://www.pbs.org/wgbh/masterpiece/episodes/victoria-s2-e1/
|
||||
]
|
||||
|
||||
media_id = self._search_regex(
|
||||
@@ -456,7 +503,8 @@ class PBSIE(InfoExtractor):
|
||||
if not url:
|
||||
url = self._og_search_url(webpage)
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mobj = re.match(
|
||||
self._VALID_URL, self._proto_relative_url(url.strip()))
|
||||
|
||||
player_id = mobj.group('player_id')
|
||||
if not display_id:
|
||||
@@ -466,13 +514,27 @@ class PBSIE(InfoExtractor):
|
||||
url, display_id, note='Downloading player page',
|
||||
errnote='Could not download player page')
|
||||
video_id = self._search_regex(
|
||||
r'<div\s+id="video_([0-9]+)"', player_page, 'video ID')
|
||||
r'<div\s+id=["\']video_(\d+)', player_page, 'video ID',
|
||||
default=None)
|
||||
if not video_id:
|
||||
video_info = self._extract_video_data(
|
||||
player_page, 'video data', display_id)
|
||||
video_id = compat_str(
|
||||
video_info.get('id') or video_info['contentID'])
|
||||
else:
|
||||
video_id = mobj.group('id')
|
||||
display_id = video_id
|
||||
|
||||
return video_id, display_id, None, description
|
||||
|
||||
def _extract_video_data(self, string, name, video_id, fatal=True):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n',
|
||||
r'window\.videoBridge\s*=\s*({.+?});'],
|
||||
string, name, default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id, upload_date, description = self._extract_webpage(url)
|
||||
|
||||
@@ -503,11 +565,8 @@ class PBSIE(InfoExtractor):
|
||||
'http://player.pbs.org/%s/%s' % (page, video_id),
|
||||
display_id, 'Downloading %s page' % page, fatal=False)
|
||||
if player:
|
||||
video_info = self._parse_json(
|
||||
self._search_regex(
|
||||
[r'(?s)PBS\.videoData\s*=\s*({.+?});\n', r'window\.videoBridge\s*=\s*({.+?});'],
|
||||
player, '%s video data' % page, default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
video_info = self._extract_video_data(
|
||||
player, '%s video data' % page, display_id, fatal=False)
|
||||
if video_info:
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
parse_resolution,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@@ -116,12 +117,14 @@ class PeerTubeIE(InfoExtractor):
|
||||
videos\.tcit\.fr|
|
||||
peertube\.cpy\.re
|
||||
)'''
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
%s
|
||||
/(?:videos/(?:watch|embed)|api/v\d/videos)/
|
||||
(?P<id>[^/?\#&]+)
|
||||
''' % _INSTANCES_RE
|
||||
(?:
|
||||
peertube:(?P<host>[^:]+):|
|
||||
https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_INSTANCES_RE, _UUID_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
||||
'md5': '80f24ff364cc9d333529506a263e7feb',
|
||||
@@ -157,21 +160,40 @@ class PeerTubeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
|
||||
% PeerTubeIE._INSTANCES_RE, webpage)]
|
||||
def _extract_peertube_url(webpage, source_url):
|
||||
mobj = re.match(
|
||||
r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
|
||||
% PeerTubeIE._UUID_RE, source_url)
|
||||
if mobj and any(p in webpage for p in (
|
||||
'<title>PeerTube<',
|
||||
'There will be other non JS-based clients to access PeerTube',
|
||||
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
|
||||
return 'peertube:%s:%s' % mobj.group('host', 'id')
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage, source_url):
|
||||
entries = re.findall(
|
||||
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
|
||||
% (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
|
||||
if not entries:
|
||||
peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
|
||||
if peertube_url:
|
||||
entries = [peertube_url]
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host') or mobj.group('host_2')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._download_json(
|
||||
urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
|
||||
'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
|
||||
|
||||
title = video['name']
|
||||
|
||||
@@ -179,8 +201,8 @@ class PeerTubeIE(InfoExtractor):
|
||||
for file_ in video['files']:
|
||||
if not isinstance(file_, dict):
|
||||
continue
|
||||
file_url = file_.get('fileUrl')
|
||||
if not file_url or not isinstance(file_url, compat_str):
|
||||
file_url = url_or_none(file_.get('fileUrl'))
|
||||
if not file_url:
|
||||
continue
|
||||
file_size = int_or_none(file_.get('size'))
|
||||
format_id = try_get(
|
||||
|
@@ -27,6 +27,60 @@ from ..utils import (
|
||||
class PluralsightBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://app.pluralsight.com'
|
||||
|
||||
_GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE
|
||||
_GRAPHQL_HEADERS = {
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
}
|
||||
_GRAPHQL_COURSE_TMPL = '''
|
||||
query BootstrapPlayer {
|
||||
rpc {
|
||||
bootstrapPlayer {
|
||||
profile {
|
||||
firstName
|
||||
lastName
|
||||
email
|
||||
username
|
||||
userHandle
|
||||
authed
|
||||
isAuthed
|
||||
plan
|
||||
}
|
||||
course(courseId: "%s") {
|
||||
name
|
||||
title
|
||||
courseHasCaptions
|
||||
translationLanguages {
|
||||
code
|
||||
name
|
||||
}
|
||||
supportsWideScreenVideoFormats
|
||||
timestamp
|
||||
modules {
|
||||
name
|
||||
title
|
||||
duration
|
||||
formattedDuration
|
||||
author
|
||||
authorized
|
||||
clips {
|
||||
authorized
|
||||
clipId
|
||||
duration
|
||||
formattedDuration
|
||||
id
|
||||
index
|
||||
moduleIndex
|
||||
moduleTitle
|
||||
name
|
||||
title
|
||||
watched
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _download_course(self, course_id, url, display_id):
|
||||
try:
|
||||
return self._download_course_rpc(course_id, url, display_id)
|
||||
@@ -39,20 +93,14 @@ class PluralsightBaseIE(InfoExtractor):
|
||||
|
||||
def _download_course_rpc(self, course_id, url, display_id):
|
||||
response = self._download_json(
|
||||
'%s/player/functions/rpc' % self._API_BASE, display_id,
|
||||
'Downloading course JSON',
|
||||
data=json.dumps({
|
||||
'fn': 'bootstrapPlayer',
|
||||
'payload': {
|
||||
'courseId': course_id,
|
||||
},
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
})
|
||||
self._GRAPHQL_EP, display_id, data=json.dumps({
|
||||
'query': self._GRAPHQL_COURSE_TMPL % course_id,
|
||||
'variables': {}
|
||||
}).encode('utf-8'), headers=self._GRAPHQL_HEADERS)
|
||||
|
||||
course = try_get(response, lambda x: x['payload']['course'], dict)
|
||||
course = try_get(
|
||||
response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'],
|
||||
dict)
|
||||
if course:
|
||||
return course
|
||||
|
||||
@@ -90,6 +138,28 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
GRAPHQL_VIEWCLIP_TMPL = '''
|
||||
query viewClip {
|
||||
viewClip(input: {
|
||||
author: "%(author)s",
|
||||
clipIndex: %(clipIndex)d,
|
||||
courseName: "%(courseName)s",
|
||||
includeCaptions: %(includeCaptions)s,
|
||||
locale: "%(locale)s",
|
||||
mediaType: "%(mediaType)s",
|
||||
moduleName: "%(moduleName)s",
|
||||
quality: "%(quality)s"
|
||||
}) {
|
||||
urls {
|
||||
url
|
||||
cdn
|
||||
rank
|
||||
source
|
||||
},
|
||||
status
|
||||
}
|
||||
}'''
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@@ -277,7 +347,7 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
f = QUALITIES[quality].copy()
|
||||
clip_post = {
|
||||
'author': author,
|
||||
'includeCaptions': False,
|
||||
'includeCaptions': 'false',
|
||||
'clipIndex': int(clip_idx),
|
||||
'courseName': course_name,
|
||||
'locale': 'en',
|
||||
@@ -286,11 +356,23 @@ class PluralsightIE(PluralsightBaseIE):
|
||||
'quality': '%dx%d' % (f['width'], f['height']),
|
||||
}
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
viewclip = self._download_json(
|
||||
'%s/video/clips/viewclip' % self._API_BASE, display_id,
|
||||
'Downloading %s viewclip JSON' % format_id, fatal=False,
|
||||
data=json.dumps(clip_post).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||
|
||||
try:
|
||||
viewclip = self._download_json(
|
||||
self._GRAPHQL_EP, display_id,
|
||||
'Downloading %s viewclip graphql' % format_id,
|
||||
data=json.dumps({
|
||||
'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post,
|
||||
'variables': {}
|
||||
}).encode('utf-8'),
|
||||
headers=self._GRAPHQL_HEADERS)['data']['viewClip']
|
||||
except ExtractorError:
|
||||
# Still works but most likely will go soon
|
||||
viewclip = self._download_json(
|
||||
'%s/video/clips/viewclip' % self._API_BASE, display_id,
|
||||
'Downloading %s viewclip JSON' % format_id, fatal=False,
|
||||
data=json.dumps(clip_post).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||
|
||||
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
||||
# to return 429 HTTP errors after some time (see
|
||||
|
@@ -43,7 +43,8 @@ class PornComIE(InfoExtractor):
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
|
||||
(r'=\s*({.+?})\s*;\s*v1ar\b',
|
||||
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='),
|
||||
webpage, 'config', default='{}'),
|
||||
display_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
@@ -69,7 +70,7 @@ class PornComIE(InfoExtractor):
|
||||
'height': int(height),
|
||||
'filesize_approx': parse_filesize(filesize),
|
||||
} for format_url, height, filesize in re.findall(
|
||||
r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
|
||||
r'<a[^>]+href="(/download/[^"]+)">[^<]*?(\d+)p<span[^>]*>(\d+\s*[a-zA-Z]+)<',
|
||||
webpage)]
|
||||
thumbnail = None
|
||||
duration = None
|
||||
|
@@ -4,28 +4,21 @@ from __future__ import unicode_literals
|
||||
import functools
|
||||
import itertools
|
||||
import operator
|
||||
# import os
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
# compat_urllib_parse_unquote,
|
||||
# compat_urllib_parse_unquote_plus,
|
||||
# compat_urllib_parse_urlparse,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
# sanitized_Request,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
)
|
||||
# from ..aes import (
|
||||
# aes_decrypt_text
|
||||
# )
|
||||
|
||||
|
||||
class PornHubIE(InfoExtractor):
|
||||
@@ -62,7 +55,7 @@ class PornHubIE(InfoExtractor):
|
||||
'id': '1331683002',
|
||||
'ext': 'mp4',
|
||||
'title': '重庆婷婷女王足交',
|
||||
'uploader': 'cj397186295',
|
||||
'uploader': 'Unknown',
|
||||
'duration': 1753,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@@ -121,7 +114,7 @@ class PornHubIE(InfoExtractor):
|
||||
self._set_cookie('pornhub.com', 'platform', platform)
|
||||
return self._download_webpage(
|
||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||
video_id)
|
||||
video_id, 'Downloading %s webpage' % platform)
|
||||
|
||||
webpage = dl_webpage('pc')
|
||||
|
||||
@@ -134,48 +127,19 @@ class PornHubIE(InfoExtractor):
|
||||
'PornHub said: %s' % error_msg,
|
||||
expected=True, video_id=video_id)
|
||||
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
assignments = self._search_regex(
|
||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||
'encoded url').split(';')
|
||||
|
||||
js_vars = {}
|
||||
|
||||
def parse_js_value(inp):
|
||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||
if '+' in inp:
|
||||
inps = inp.split('+')
|
||||
return functools.reduce(
|
||||
operator.concat, map(parse_js_value, inps))
|
||||
inp = inp.strip()
|
||||
if inp in js_vars:
|
||||
return js_vars[inp]
|
||||
return remove_quotes(inp)
|
||||
|
||||
for assn in assignments:
|
||||
assn = assn.strip()
|
||||
if not assn:
|
||||
continue
|
||||
assn = re.sub(r'var\s+', '', assn)
|
||||
vname, value = assn.split('=', 1)
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
|
||||
video_url = js_vars['mediastring']
|
||||
|
||||
title = self._search_regex(
|
||||
r'<h1>([^>]+)</h1>', tv_webpage, 'title', default=None)
|
||||
|
||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||
# on that anymore.
|
||||
title = title or self._html_search_meta(
|
||||
title = self._html_search_meta(
|
||||
'twitter:title', webpage, default=None) or self._search_regex(
|
||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
||||
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
|
||||
webpage, 'title', group='title')
|
||||
|
||||
video_urls = []
|
||||
video_urls_set = set()
|
||||
|
||||
flashvars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
|
||||
@@ -183,8 +147,78 @@ class PornHubIE(InfoExtractor):
|
||||
if flashvars:
|
||||
thumbnail = flashvars.get('image_url')
|
||||
duration = int_or_none(flashvars.get('video_duration'))
|
||||
media_definitions = flashvars.get('mediaDefinitions')
|
||||
if isinstance(media_definitions, list):
|
||||
for definition in media_definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = definition.get('videoUrl')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
continue
|
||||
if video_url in video_urls_set:
|
||||
continue
|
||||
video_urls_set.add(video_url)
|
||||
video_urls.append(
|
||||
(video_url, int_or_none(definition.get('quality'))))
|
||||
else:
|
||||
title, thumbnail, duration = [None] * 3
|
||||
thumbnail, duration = [None] * 2
|
||||
|
||||
if not video_urls:
|
||||
tv_webpage = dl_webpage('tv')
|
||||
|
||||
assignments = self._search_regex(
|
||||
r'(var.+?mediastring.+?)</script>', tv_webpage,
|
||||
'encoded url').split(';')
|
||||
|
||||
js_vars = {}
|
||||
|
||||
def parse_js_value(inp):
|
||||
inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
|
||||
if '+' in inp:
|
||||
inps = inp.split('+')
|
||||
return functools.reduce(
|
||||
operator.concat, map(parse_js_value, inps))
|
||||
inp = inp.strip()
|
||||
if inp in js_vars:
|
||||
return js_vars[inp]
|
||||
return remove_quotes(inp)
|
||||
|
||||
for assn in assignments:
|
||||
assn = assn.strip()
|
||||
if not assn:
|
||||
continue
|
||||
assn = re.sub(r'var\s+', '', assn)
|
||||
vname, value = assn.split('=', 1)
|
||||
js_vars[vname] = parse_js_value(value)
|
||||
|
||||
video_url = js_vars['mediastring']
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage):
|
||||
video_url = mobj.group('url')
|
||||
if video_url not in video_urls_set:
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
formats = []
|
||||
for video_url, height in video_urls:
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
tbr = int(mobj.group('tbr'))
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': '%dp' % height if height else None,
|
||||
'height': height,
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
@@ -210,7 +244,6 @@ class PornHubIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
@@ -219,7 +252,7 @@ class PornHubIE(InfoExtractor):
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'comment_count': comment_count,
|
||||
# 'formats': formats,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
|
@@ -53,7 +53,7 @@ class RBMARadioIE(InfoExtractor):
|
||||
'format_id': compat_str(abr),
|
||||
'abr': abr,
|
||||
'vcodec': 'none',
|
||||
} for abr in (96, 128, 256)]
|
||||
} for abr in (96, 128, 192, 256)]
|
||||
self._check_formats(formats, episode_id)
|
||||
|
||||
description = clean_html(episode.get('longTeaser'))
|
||||
|
@@ -3,12 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -71,8 +71,8 @@ class RedTubeIE(InfoExtractor):
|
||||
video_id, fatal=False)
|
||||
if medias and isinstance(medias, list):
|
||||
for media in medias:
|
||||
format_url = media.get('videoUrl')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('quality')
|
||||
formats.append({
|
||||
|
@@ -6,6 +6,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -37,8 +38,8 @@ class RENTVIE(InfoExtractor):
|
||||
title = config['title']
|
||||
formats = []
|
||||
for video in config['src']:
|
||||
src = video.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(video.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
|
@@ -1,10 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -14,20 +18,19 @@ class RTBFIE(InfoExtractor):
|
||||
(?:
|
||||
video/[^?]+\?.*\bid=|
|
||||
ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
|
||||
auvio/[^/]+\?.*id=
|
||||
auvio/[^/]+\?.*\b(?P<live>l)?id=
|
||||
)(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
|
||||
'md5': '799f334ddf2c0a582ba80c44655be570',
|
||||
'md5': '8c876a1cceeb6cf31b476461ade72384',
|
||||
'info_dict': {
|
||||
'id': '1921274',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les Diables au coeur (épisode 2)',
|
||||
'description': 'Football - Diables Rouges',
|
||||
'duration': 3099,
|
||||
'description': '(du 25/04/2014)',
|
||||
'duration': 3099.54,
|
||||
'upload_date': '20140425',
|
||||
'timestamp': 1398456336,
|
||||
'uploader': 'rtbfsport',
|
||||
'timestamp': 1398456300,
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
@@ -39,6 +42,18 @@ class RTBFIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Live
|
||||
'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Audio
|
||||
'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# With Subtitle
|
||||
'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
|
||||
_PROVIDERS = {
|
||||
@@ -53,46 +68,94 @@ class RTBFIE(InfoExtractor):
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id)
|
||||
live, media_id = re.match(self._VALID_URL, url).groups()
|
||||
embed_page = self._download_webpage(
|
||||
'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
|
||||
media_id, query={'id': media_id})
|
||||
data = self._parse_json(self._html_search_regex(
|
||||
r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
|
||||
|
||||
error = data.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
data = data['data']
|
||||
|
||||
provider = data.get('provider')
|
||||
if provider in self._PROVIDERS:
|
||||
return self.url_result(data['url'], self._PROVIDERS[provider])
|
||||
|
||||
title = data['title']
|
||||
is_live = data.get('isLive')
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
height_re = r'-(\d+)p\.'
|
||||
formats = []
|
||||
for key, format_id in self._QUALITIES:
|
||||
format_url = data.get(key + 'Url')
|
||||
if format_url:
|
||||
|
||||
m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
|
||||
fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
|
||||
http_url = data.get('url')
|
||||
if formats and http_url and re.search(height_re, http_url):
|
||||
http_url = fix_url(http_url)
|
||||
for m3u8_f in formats[:]:
|
||||
height = m3u8_f.get('height')
|
||||
if not height:
|
||||
continue
|
||||
f = m3u8_f.copy()
|
||||
del f['protocol']
|
||||
f.update({
|
||||
'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
|
||||
'url': re.sub(height_re, '-%dp.' % height, http_url),
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
sources = data.get('sources') or {}
|
||||
for key, format_id in self._QUALITIES:
|
||||
format_url = sources.get(key)
|
||||
if not format_url:
|
||||
continue
|
||||
height = int_or_none(self._search_regex(
|
||||
height_re, format_url, 'height', default=None))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': format_url,
|
||||
'url': fix_url(format_url),
|
||||
'height': height,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items():
|
||||
if thumbnail_id != 'default':
|
||||
thumbnails.append({
|
||||
'url': self._IMAGE_HOST + thumbnail_url,
|
||||
'id': thumbnail_id,
|
||||
})
|
||||
mpd_url = data.get('urlDash')
|
||||
if not data.get('drm') and mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, media_id, mpd_id='dash', fatal=False))
|
||||
|
||||
audio_url = data.get('urlAudio')
|
||||
if audio_url:
|
||||
formats.append({
|
||||
'format_id': 'audio',
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for track in (data.get('tracks') or {}).values():
|
||||
sub_url = track.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('lang') or 'fr', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': media_id,
|
||||
'formats': formats,
|
||||
'title': data['title'],
|
||||
'description': data.get('description') or data.get('subtitle'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': data.get('duration') or data.get('realDuration'),
|
||||
'timestamp': int_or_none(data.get('created')),
|
||||
'view_count': int_or_none(data.get('viewCount')),
|
||||
'uploader': data.get('channel'),
|
||||
'tags': data.get('tags'),
|
||||
'title': title,
|
||||
'description': strip_or_none(data.get('description')),
|
||||
'thumbnail': data.get('thumbnail'),
|
||||
'duration': float_or_none(data.get('realDuration')),
|
||||
'timestamp': int_or_none(data.get('liveFrom')),
|
||||
'series': data.get('programLabel'),
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -176,8 +177,8 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
|
||||
break
|
||||
|
||||
for result in results:
|
||||
video_url = result.get('video_url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(result.get('video_url'))
|
||||
if not video_url:
|
||||
continue
|
||||
entry = self._extract_video(result, require_title=False)
|
||||
entry.update({
|
||||
|
@@ -19,29 +19,33 @@ from ..utils import (
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.6play.fr/le-meilleur-patissier-p_1807/le-meilleur-patissier-special-fetes-mercredi-a-21-00-sur-m6-c_11638450',
|
||||
'md5': '42310bffe4ba3982db112b9cd3467328',
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
'info_dict': {
|
||||
'id': '11638450',
|
||||
'id': '12041051',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Meilleur Pâtissier, spécial fêtes mercredi à 21:00 sur M6',
|
||||
'description': 'md5:308853f6a5f9e2d55a30fc0654de415f',
|
||||
'duration': 39,
|
||||
'series': 'Le meilleur pâtissier',
|
||||
'title': 'Le but qui a marqué l\'histoire du football français !',
|
||||
'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain, video_id = re.search(self._VALID_URL, url).groups()
|
||||
service, consumer_name = {
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/6play/videos/clip_%s' % video_id,
|
||||
video_id, query={
|
||||
'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id),
|
||||
video_id, headers={
|
||||
'x-customer-name': consumer_name
|
||||
}, query={
|
||||
'csa': 5,
|
||||
'with': 'clips',
|
||||
})
|
||||
@@ -65,7 +69,14 @@ class SixPlayIE(InfoExtractor):
|
||||
subtitles.setdefault('fr', []).append({'url': asset_url})
|
||||
continue
|
||||
if container == 'm3u8' or ext == 'm3u8':
|
||||
if protocol == 'usp' and not compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
|
||||
if protocol == 'usp':
|
||||
if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]:
|
||||
urlh = self._request_webpage(
|
||||
asset_url, video_id, fatal=False,
|
||||
headers=self.geo_verification_headers())
|
||||
if not urlh:
|
||||
continue
|
||||
asset_url = urlh.geturl()
|
||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
|
@@ -1,12 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class SlutloadIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
|
||||
'md5': '868309628ba00fd488cf516a113fd717',
|
||||
@@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
|
||||
'title': 'virginie baisee en cam',
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:https?://.*?\.jpg'
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# mobile site
|
||||
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
|
||||
webpage = self._download_webpage(desktop_url, video_id)
|
||||
embed_page = self._download_webpage(
|
||||
'http://www.slutload.com/embed_player/%s' % video_id, video_id,
|
||||
'Downloading embed page', fatal=False)
|
||||
|
||||
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
|
||||
webpage, 'title').strip()
|
||||
if embed_page:
|
||||
def extract(what):
|
||||
return self._html_search_regex(
|
||||
r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
|
||||
embed_page, 'video %s' % what, default=None, group='url')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
|
||||
webpage, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
video_url = extract('url')
|
||||
if video_url:
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)', embed_page, 'title', default=video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': extract('preview'),
|
||||
'age_limit': 18
|
||||
}
|
||||
|
||||
return {
|
||||
webpage = self._download_webpage(
|
||||
'http://www.slutload.com/video/_/%s/' % video_id, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
}
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
})
|
||||
return info
|
||||
|
@@ -12,6 +12,8 @@ from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
compat_str,
|
||||
@@ -137,7 +139,12 @@ class SVTPlayBaseIE(SVTBaseIE):
|
||||
|
||||
class SVTPlayIE(SVTPlayBaseIE):
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
svt:(?P<svt_id>[^/?#&]+)|
|
||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
||||
@@ -164,10 +171,40 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/kanaler/svt1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'svt:1376446-003A',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'svt:14278044',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _adjust_title(self, info):
|
||||
if info['is_live']:
|
||||
info['title'] = self._live_title(info['title'])
|
||||
|
||||
def _extract_by_video_id(self, video_id, webpage=None):
|
||||
data = self._download_json(
|
||||
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
if not info_dict.get('title'):
|
||||
title = dict_get(info_dict, ('episode', 'series'))
|
||||
if not title and webpage:
|
||||
title = re.sub(
|
||||
r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
|
||||
if not title:
|
||||
title = video_id
|
||||
info_dict['title'] = title
|
||||
self._adjust_title(info_dict)
|
||||
return info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, svt_id = mobj.group('id', 'svt_id')
|
||||
|
||||
if svt_id:
|
||||
return self._extract_by_video_id(svt_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -179,10 +216,6 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
def adjust_title(info):
|
||||
if info['is_live']:
|
||||
info['title'] = self._live_title(info['title'])
|
||||
|
||||
if data:
|
||||
video_info = try_get(
|
||||
data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
|
||||
@@ -193,24 +226,14 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
adjust_title(info_dict)
|
||||
self._adjust_title(info_dict)
|
||||
return info_dict
|
||||
|
||||
video_id = self._search_regex(
|
||||
svt_id = self._search_regex(
|
||||
r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||
webpage, 'video id', default=None)
|
||||
webpage, 'video id')
|
||||
|
||||
if video_id:
|
||||
data = self._download_json(
|
||||
'https://api.svt.se/videoplayer-api/video/%s' % video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
if not info_dict.get('title'):
|
||||
info_dict['title'] = re.sub(
|
||||
r'\s*\|\s*.+?$', '',
|
||||
info_dict.get('episode') or self._og_search_title(webpage))
|
||||
adjust_title(info_dict)
|
||||
return info_dict
|
||||
return self._extract_by_video_id(svt_id, webpage)
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
@@ -292,3 +315,57 @@ class SVTSeriesIE(SVTPlayBaseIE):
|
||||
|
||||
return self.playlist_result(
|
||||
entries, series_id, title, metadata.get('description'))
|
||||
|
||||
|
||||
class SVTPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
|
||||
'info_dict': {
|
||||
'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
|
||||
'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
|
||||
'info_dict': {
|
||||
'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
|
||||
'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
# only programTitle
|
||||
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stjärnorna skojar till det - under SVT-intervjun',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'data-video-id=["\'](\d+)', webpage))]
|
||||
|
||||
title = strip_or_none(self._og_search_title(webpage, default=None))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
@@ -19,6 +19,7 @@ class TF1IE(InfoExtractor):
|
||||
# Sometimes wat serves the whole file with the --test option
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.tfou.fr/chuggington/videos/le-grand-mysterioso-chuggington-7085291-739.html',
|
||||
'info_dict': {
|
||||
|
@@ -32,13 +32,15 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformBaseIE(OnceIE):
|
||||
_TP_TLD = 'com'
|
||||
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
meta = self._download_xml(
|
||||
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||
headers=self.geo_verification_headers())
|
||||
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||
if error_element is not None and error_element.attrib['src'].startswith(
|
||||
'http://link.theplatform.com/s/errorFiles/Unavailable.'):
|
||||
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
|
||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
||||
|
||||
smil_formats = self._parse_smil_formats(
|
||||
@@ -66,7 +68,7 @@ class ThePlatformBaseIE(OnceIE):
|
||||
return formats, subtitles
|
||||
|
||||
def _download_theplatform_metadata(self, path, video_id):
|
||||
info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
|
||||
info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
|
||||
return self._download_json(info_url, video_id)
|
||||
|
||||
def _parse_theplatform_metadata(self, info):
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -154,8 +155,8 @@ class TurnerBaseIE(AdobePassIE):
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = track.get('url')
|
||||
if not isinstance(track_url, compat_str) or track_url.endswith('/big'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
if not track_url or track_url.endswith('/big'):
|
||||
continue
|
||||
lang = track.get('lang') or track.get('label') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
|
@@ -1,13 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -78,42 +77,25 @@ class TV4IE(InfoExtractor):
|
||||
|
||||
title = info['title']
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
# http formats are linked with unresolvable host
|
||||
for kind in ('hls3', ''):
|
||||
data = self._download_json(
|
||||
'https://prima.tv4play.se/api/web/asset/%s/play.json' % video_id,
|
||||
video_id, 'Downloading sources JSON', query={
|
||||
'protocol': kind,
|
||||
'videoFormat': 'MP4+WEBVTT',
|
||||
})
|
||||
items = try_get(data, lambda x: x['playback']['items']['item'])
|
||||
if not items:
|
||||
continue
|
||||
if isinstance(items, dict):
|
||||
items = [items]
|
||||
for item in items:
|
||||
manifest_url = item.get('url')
|
||||
if not isinstance(manifest_url, compat_str):
|
||||
continue
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=kind, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
manifest_url, video_id, {
|
||||
'hls': 'tv4play-i.akamaihd.net',
|
||||
}))
|
||||
elif ext == 'webvtt':
|
||||
subtitles = self._merge_subtitles(
|
||||
subtitles, {
|
||||
'sv': [{
|
||||
'url': manifest_url,
|
||||
'ext': 'vtt',
|
||||
}]})
|
||||
manifest_url = self._download_json(
|
||||
'https://playback-api.b17g.net/media/' + video_id,
|
||||
video_id, query={
|
||||
'service': 'tv4',
|
||||
'device': 'browser',
|
||||
'protocol': 'hls',
|
||||
})['playbackItem']['manifestUrl']
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
|
||||
if not formats and info.get('is_geo_restricted'):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
@@ -124,7 +106,7 @@ class TV4IE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
# 'subtitles': subtitles,
|
||||
'description': info.get('description'),
|
||||
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
|
147
youtube_dl/extractor/tvnet.py
Normal file
147
youtube_dl/extractor/tvnet.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class TVNetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+)\.tvnet\.gov\.vn/[^/]+/(?:\d+/)?(?P<id>\d+)(?:/|$)'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://de.tvnet.gov.vn/video/109788/vtv1---bac-tuyet-tai-lao-cai-va-ha-giang/tin-nong-24h',
|
||||
'md5': 'b4d7abe0252c9b47774760b7519c7558',
|
||||
'info_dict': {
|
||||
'id': '109788',
|
||||
'ext': 'mp4',
|
||||
'title': 'VTV1 - Bắc tuyết tại Lào Cai và Hà Giang',
|
||||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||
'is_live': False,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://vn.tvnet.gov.vn/radio/27017/vov1---ban-tin-chieu-10062018/doi-song-va-xa-hoi',
|
||||
'md5': 'b5875ce9b0a2eecde029216d0e6db2ae',
|
||||
'info_dict': {
|
||||
'id': '27017',
|
||||
'ext': 'm4a',
|
||||
'title': 'VOV1 - Bản tin chiều (10/06/2018)',
|
||||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||
'is_live': False,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://us.tvnet.gov.vn/video/118023/129999/ngay-0705',
|
||||
'info_dict': {
|
||||
'id': '129999',
|
||||
'ext': 'mp4',
|
||||
'title': 'VTV1 - Quốc hội với cử tri (11/06/2018)',
|
||||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||
'is_live': False,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# live stream
|
||||
'url': 'http://us.tvnet.gov.vn/kenh-truyen-hinh/1011/vtv1',
|
||||
'info_dict': {
|
||||
'id': '1011',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^VTV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# radio live stream
|
||||
'url': 'http://vn.tvnet.gov.vn/kenh-truyen-hinh/1014',
|
||||
'info_dict': {
|
||||
'id': '1014',
|
||||
'ext': 'm4a',
|
||||
'title': r're:VOV1 \| LiveTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'thumbnail': r're:(?i)https?://.*\.(?:jpg|png)',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://us.tvnet.gov.vn/phim/6136/25510/vtv3---ca-mot-doi-an-oan-tap-1-50/phim-truyen-hinh',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'title', webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)<', webpage, 'title')
|
||||
title = re.sub(r'\s*-\s*TV Net\s*$', '', title)
|
||||
|
||||
if '/video/' in url or '/radio/' in url:
|
||||
is_live = False
|
||||
elif '/kenh-truyen-hinh/' in url:
|
||||
is_live = True
|
||||
else:
|
||||
is_live = None
|
||||
|
||||
data_file = unescapeHTML(self._search_regex(
|
||||
r'data-file=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
||||
'data file', group='url'))
|
||||
|
||||
stream_urls = set()
|
||||
formats = []
|
||||
for stream in self._download_json(data_file, video_id):
|
||||
if not isinstance(stream, dict):
|
||||
continue
|
||||
stream_url = url_or_none(stream.get('url'))
|
||||
if stream_url in stream_urls or not stream_url:
|
||||
continue
|
||||
stream_urls.add(stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
# better support for radio streams
|
||||
if title.startswith('VOV'):
|
||||
for f in formats:
|
||||
f.update({
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or unescapeHTML(
|
||||
self._search_regex(
|
||||
r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage,
|
||||
'thumbnail', default=None, group='url'))
|
||||
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'is_live': is_live,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -19,8 +19,8 @@ class TVNowBaseIE(InfoExtractor):
|
||||
_VIDEO_FIELDS = (
|
||||
'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
|
||||
'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
|
||||
'manifest.dashclear', 'format.title', 'format.defaultImage169Format',
|
||||
'format.defaultImage169Logo')
|
||||
'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
|
||||
'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
return self._download_json(
|
||||
@@ -31,27 +31,42 @@ class TVNowBaseIE(InfoExtractor):
|
||||
video_id = compat_str(info['id'])
|
||||
title = info['title']
|
||||
|
||||
mpd_url = info['manifest']['dashclear']
|
||||
if not mpd_url:
|
||||
paths = []
|
||||
for manifest_url in (info.get('manifest') or {}).values():
|
||||
if not manifest_url:
|
||||
continue
|
||||
manifest_url = update_url_query(manifest_url, {'filter': ''})
|
||||
path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
|
||||
if path in paths:
|
||||
continue
|
||||
paths.append(path)
|
||||
|
||||
def url_repl(proto, suffix):
|
||||
return re.sub(
|
||||
r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
|
||||
r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
|
||||
'.ism/' + suffix, manifest_url))
|
||||
|
||||
formats = self._extract_mpd_formats(
|
||||
url_repl('dash', '.mpd'), video_id,
|
||||
mpd_id='dash', fatal=False)
|
||||
formats.extend(self._extract_ism_formats(
|
||||
url_repl('hss', 'Manifest'),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url_repl('hls', '.m3u8'), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
if formats:
|
||||
break
|
||||
else:
|
||||
if info.get('isDrm'):
|
||||
raise ExtractorError(
|
||||
'Video %s is DRM protected' % video_id, expected=True)
|
||||
if info.get('geoblocked'):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available from your location due to geo restriction' % video_id,
|
||||
expected=True)
|
||||
raise self.raise_geo_restricted()
|
||||
if not info.get('free', True):
|
||||
raise ExtractorError(
|
||||
'Video %s is not available for free' % video_id, expected=True)
|
||||
|
||||
mpd_url = update_url_query(mpd_url, {'filter': ''})
|
||||
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(self._extract_ism_formats(
|
||||
mpd_url.replace('dash.', 'hss.').replace('/.mpd', '/Manifest'),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mpd_url.replace('dash.', 'hls.').replace('/.mpd', '/.m3u8'),
|
||||
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = info.get('articleLong') or info.get('articleShort')
|
||||
@@ -88,7 +103,7 @@ class TVNowBaseIE(InfoExtractor):
|
||||
class TVNowIE(TVNowBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
|
||||
(?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
|
||||
(?P<show_id>[^/]+)/
|
||||
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
@@ -140,11 +155,13 @@ class TVNowIE(TVNowBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = '%s/%s' % re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = '%s/%s' % mobj.group(2, 3)
|
||||
|
||||
info = self._call_api(
|
||||
'movies/' + display_id, display_id, query={
|
||||
'fields': ','.join(self._VIDEO_FIELDS),
|
||||
'station': mobj.group(1),
|
||||
})
|
||||
|
||||
return self._extract_video(info, display_id)
|
||||
|
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -255,7 +256,8 @@ class TVPlayIE(InfoExtractor):
|
||||
quality = qualities(['hls', 'medium', 'high'])
|
||||
formats = []
|
||||
for format_id, video_url in streams.get('streams', {}).items():
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video_url)
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
|
@@ -27,6 +27,7 @@ from ..utils import (
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
@@ -663,8 +664,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
for option in status['quality_options']:
|
||||
if not isinstance(option, dict):
|
||||
continue
|
||||
source = option.get('source')
|
||||
if not source or not isinstance(source, compat_str):
|
||||
source = url_or_none(option.get('source'))
|
||||
if not source:
|
||||
continue
|
||||
formats.append({
|
||||
'url': source,
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@@ -265,8 +266,8 @@ class UdemyIE(InfoExtractor):
|
||||
if not isinstance(source_list, list):
|
||||
return
|
||||
for source in source_list:
|
||||
video_url = source.get('file') or source.get('src')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(source.get('file') or source.get('src'))
|
||||
if not video_url:
|
||||
continue
|
||||
if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -293,8 +294,8 @@ class UdemyIE(InfoExtractor):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
src = track.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(track.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
lang = track.get('language') or track.get(
|
||||
'srclang') or track.get('label')
|
||||
@@ -314,8 +315,8 @@ class UdemyIE(InfoExtractor):
|
||||
for cc in captions:
|
||||
if not isinstance(cc, dict):
|
||||
continue
|
||||
cc_url = cc.get('url')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc.get('url'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
|
||||
sub_dict = (automatic_captions if cc.get('source') == 'auto'
|
||||
|
@@ -24,6 +24,7 @@ class VGTVIE(XstreamIE):
|
||||
'aftenposten.no/webtv': 'aptv',
|
||||
'ap.vgtv.no/webtv': 'aptv',
|
||||
'tv.aftonbladet.se/abtv': 'abtv',
|
||||
'www.aftonbladet.se/tv': 'abtv',
|
||||
}
|
||||
|
||||
_APP_NAME_TO_VENDOR = {
|
||||
@@ -44,7 +45,7 @@ class VGTVIE(XstreamIE):
|
||||
(?:
|
||||
(?:\#!/)?(?:video|live)/|
|
||||
embed?.*id=|
|
||||
articles/
|
||||
a(?:rticles)?/
|
||||
)|
|
||||
(?P<appname>
|
||||
%s
|
||||
@@ -143,6 +144,10 @@ class VGTVIE(XstreamIE):
|
||||
'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.aftonbladet.se/tv/a/36015',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'abtv:140026',
|
||||
'only_matching': True,
|
||||
@@ -178,13 +183,15 @@ class VGTVIE(XstreamIE):
|
||||
|
||||
streams = data['streamUrls']
|
||||
stream_type = data.get('streamType')
|
||||
|
||||
is_live = stream_type == 'live'
|
||||
formats = []
|
||||
|
||||
hls_url = streams.get('hls')
|
||||
if hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
hls_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
hds_url = streams.get('hds')
|
||||
if hds_url:
|
||||
@@ -229,13 +236,13 @@ class VGTVIE(XstreamIE):
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': self._live_title(data['title']) if stream_type == 'live' else data['title'],
|
||||
'title': self._live_title(data['title']) if is_live else data['title'],
|
||||
'description': data['description'],
|
||||
'thumbnail': data['images']['main'] + '?t[]=900x506q80',
|
||||
'timestamp': data['published'],
|
||||
'duration': float_or_none(data['duration'], 1000),
|
||||
'view_count': data['displays'],
|
||||
'is_live': True if stream_type == 'live' else False,
|
||||
'is_live': is_live,
|
||||
})
|
||||
return info
|
||||
|
||||
|
@@ -3,15 +3,13 @@ from __future__ import unicode_literals
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -166,8 +164,8 @@ class VidmeIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for f in video.get('formats', []):
|
||||
format_url = f.get('uri')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(f.get('uri'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_type = f.get('type')
|
||||
if format_type == 'dash':
|
||||
|
@@ -54,7 +54,8 @@ class VidziIE(InfoExtractor):
|
||||
self._search_regex(
|
||||
r'setup\(([^)]+)\)', code, 'jwplayer data',
|
||||
default=NO_DEFAULT if num == len(codes) else '{}'),
|
||||
video_id, transform_source=js_to_json)
|
||||
video_id, transform_source=lambda s: js_to_json(
|
||||
re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
|
||||
if jwplayer_data:
|
||||
break
|
||||
|
||||
|
@@ -539,9 +539,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# We try to find out to which variable is assigned the config dic
|
||||
m_variable_name = re.search(r'(\w)\.video\.id', webpage)
|
||||
if m_variable_name is not None:
|
||||
config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
|
||||
config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
|
||||
else:
|
||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
|
||||
config = self._search_regex(config_re, webpage, 'info section',
|
||||
flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
|
@@ -195,16 +195,29 @@ class ViuOTTIE(InfoExtractor):
|
||||
'skip': 'Geo-restricted to Hong Kong',
|
||||
}]
|
||||
|
||||
_AREA_ID = {
|
||||
'HK': 1,
|
||||
'SG': 2,
|
||||
'TH': 4,
|
||||
'PH': 5,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
country_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
query = {
|
||||
'r': 'vod/ajax-detail',
|
||||
'platform_flag_label': 'web',
|
||||
'product_id': video_id,
|
||||
}
|
||||
|
||||
area_id = self._AREA_ID.get(country_code.upper())
|
||||
if area_id:
|
||||
query['area_id'] = area_id
|
||||
|
||||
product_data = self._download_json(
|
||||
'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
|
||||
'Downloading video info', query={
|
||||
'r': 'vod/ajax-detail',
|
||||
'platform_flag_label': 'web',
|
||||
'product_id': video_id,
|
||||
})['data']
|
||||
'Downloading video info', query=query)['data']
|
||||
|
||||
video_data = product_data.get('current_product')
|
||||
if not video_data:
|
||||
@@ -214,6 +227,9 @@ class ViuOTTIE(InfoExtractor):
|
||||
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
|
||||
video_id, 'Downloading stream info', query={
|
||||
'ccs_product_id': video_data['ccs_product_id'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
'Origin': re.search(r'https?://[^/]+', url).group(0),
|
||||
})['data']['stream']
|
||||
|
||||
stream_sizes = stream_data.get('size', {})
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
@@ -423,7 +424,8 @@ class VKIE(VKBaseIE):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data.items():
|
||||
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
|
||||
continue
|
||||
if (format_id.startswith(('url', 'cache')) or
|
||||
format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
|
||||
|
@@ -57,7 +57,7 @@ class VLiveIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
'https://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
|
||||
VIDEO_PARAMS_FIELD = 'video params'
|
||||
@@ -108,11 +108,11 @@ class VLiveIE(InfoExtractor):
|
||||
|
||||
def _live(self, video_id, webpage):
|
||||
init_page = self._download_webpage(
|
||||
'http://www.vlive.tv/video/init/view',
|
||||
'https://www.vlive.tv/video/init/view',
|
||||
video_id, note='Downloading live webpage',
|
||||
data=urlencode_postdata({'videoSeq': video_id}),
|
||||
headers={
|
||||
'Referer': 'http://www.vlive.tv/video/%s' % video_id,
|
||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
|
||||
|
@@ -19,7 +19,6 @@ class WatIE(InfoExtractor):
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||
'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
|
||||
'info_dict': {
|
||||
'id': '11713067',
|
||||
'ext': 'mp4',
|
||||
@@ -28,10 +27,15 @@ class WatIE(InfoExtractor):
|
||||
'upload_date': '20140819',
|
||||
'duration': 120,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html',
|
||||
'md5': '34bdfa5ca9fd3c7eb88601b635b0424c',
|
||||
'md5': 'b16574df2c3cd1a36ca0098f2a791925',
|
||||
'info_dict': {
|
||||
'id': '11713075',
|
||||
'ext': 'mp4',
|
||||
@@ -98,38 +102,25 @@ class WatIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
try:
|
||||
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||
m3u8_url = manifest_urls.get('hls')
|
||||
if m3u8_url:
|
||||
m3u8_url = remove_bitrate_limit(m3u8_url)
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
for m3u8_alt_url in alt_urls(m3u8_url):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_alt_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
||||
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr')
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
format_id = m3u8_format['format_id'].replace('hls', 'http')
|
||||
fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url)
|
||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': fmt_url,
|
||||
'format_id': format_id,
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
mpd_url = manifest_urls.get('mpd')
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(remove_bitrate_limit(
|
||||
mpd_url), video_id, mpd_id='dash', fatal=False))
|
||||
mpd_url = remove_bitrate_limit(mpd_url)
|
||||
for mpd_alt_url in alt_urls(mpd_url):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
except ExtractorError:
|
||||
abr = 64
|
||||
|
@@ -67,11 +67,12 @@ class WatchBoxIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
source = self._parse_json(
|
||||
source = (self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)source["\']?\s*:\s*({.+?})\s*[,}]', webpage, 'source',
|
||||
r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
video_id, transform_source=js_to_json,
|
||||
fatal=False) or {}).get('source') or {}
|
||||
|
||||
video_id = compat_str(source.get('videoId') or video_id)
|
||||
|
||||
|
@@ -36,7 +36,8 @@ class WimpIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youtube_id = self._search_regex(
|
||||
r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
|
||||
(r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
|
||||
r'data-id=["\']([0-9A-Za-z_-]{11})'),
|
||||
webpage, 'video URL', default=None)
|
||||
if youtube_id:
|
||||
return {
|
||||
|
@@ -13,6 +13,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -137,7 +138,8 @@ class XHamsterIE(InfoExtractor):
|
||||
else:
|
||||
format_url = format_item
|
||||
filesize = None
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, quality),
|
||||
@@ -198,7 +200,8 @@ class XHamsterIE(InfoExtractor):
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
for format_id, format_url in sources.items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_url in format_urls:
|
||||
continue
|
||||
|
@@ -4,12 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -80,9 +80,9 @@ class YapFilesIE(InfoExtractor):
|
||||
formats = []
|
||||
for format_id in QUALITIES:
|
||||
is_hd = format_id == 'hd'
|
||||
format_url = playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else ''))
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(playlist.get(
|
||||
'file%s' % ('_hd' if is_hd else '')))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user