mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-15 20:58:36 +09:00
Compare commits
114 Commits
2016.05.16
...
2016.05.30
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8267423652 | ||
![]() |
917a3196f8 | ||
![]() |
56bd028a0f | ||
![]() |
681b923b5c | ||
![]() |
9ed6d8c6c5 | ||
![]() |
f3fb420b82 | ||
![]() |
165e3561e9 | ||
![]() |
27f17c0eab | ||
![]() |
44c8892369 | ||
![]() |
f574103d7c | ||
![]() |
6d138e98e3 | ||
![]() |
2a329110b9 | ||
![]() |
2bee7b25f3 | ||
![]() |
92cf872a48 | ||
![]() |
6461f2b7ec | ||
![]() |
807cf7b07f | ||
![]() |
de7d76af52 | ||
![]() |
11c70deba7 | ||
![]() |
f36532404d | ||
![]() |
77b8b4e696 | ||
![]() |
2615fa7584 | ||
![]() |
fac2af3c51 | ||
![]() |
6f8cb24219 | ||
![]() |
448bb5f333 | ||
![]() |
293c255688 | ||
![]() |
ac88d2316e | ||
![]() |
5950cb1d6d | ||
![]() |
761052db92 | ||
![]() |
240b60453e | ||
![]() |
85b0fe7d64 | ||
![]() |
0a5685b26f | ||
![]() |
6f748df43f | ||
![]() |
b410cb83d4 | ||
![]() |
da9d82840a | ||
![]() |
4ee0b8afdb | ||
![]() |
1de32771e1 | ||
![]() |
688c634b7d | ||
![]() |
0d6ee97508 | ||
![]() |
6b43132ce9 | ||
![]() |
a4690b3244 | ||
![]() |
444417edb5 | ||
![]() |
277c7465f5 | ||
![]() |
25bcd3550e | ||
![]() |
a4760d204f | ||
![]() |
e8593f346a | ||
![]() |
05b651e3a5 | ||
![]() |
42a7439717 | ||
![]() |
b1e9ebd080 | ||
![]() |
0c50eeb987 | ||
![]() |
4b464a6a78 | ||
![]() |
5db9df622f | ||
![]() |
5181759c0d | ||
![]() |
e54373204a | ||
![]() |
102810ef04 | ||
![]() |
78d3b3e213 | ||
![]() |
7a46542f97 | ||
![]() |
eb7941e3e6 | ||
![]() |
db3b8b2103 | ||
![]() |
c5f5155100 | ||
![]() |
4a12077855 | ||
![]() |
a4a7c44bd3 | ||
![]() |
70346165fe | ||
![]() |
c776b99691 | ||
![]() |
e9297256d4 | ||
![]() |
e5871c672b | ||
![]() |
9b06b0fb92 | ||
![]() |
4f3a25c2b4 | ||
![]() |
21a19aa94d | ||
![]() |
c6b9cf05e1 | ||
![]() |
4d8819d249 | ||
![]() |
898f4b49cc | ||
![]() |
0150a00f33 | ||
![]() |
c8831015f4 | ||
![]() |
92d221ad48 | ||
![]() |
0db9a05f88 | ||
![]() |
e03b35b8f9 | ||
![]() |
d2fee3c99e | ||
![]() |
598869afb1 | ||
![]() |
7e642e4fd6 | ||
![]() |
c8cc3745fb | ||
![]() |
4c718d3c50 | ||
![]() |
115c65793a | ||
![]() |
661d46b28f | ||
![]() |
5ce3d5bd1b | ||
![]() |
612b5f403e | ||
![]() |
9f54e692d2 | ||
![]() |
7b2fcbfd4e | ||
![]() |
16da9bbc29 | ||
![]() |
c8602b2f9b | ||
![]() |
b219f5e51b | ||
![]() |
1846e9ade0 | ||
![]() |
6756602be6 | ||
![]() |
6c114b1210 | ||
![]() |
7ded6545ed | ||
![]() |
aa5957ac49 | ||
![]() |
64413f7563 | ||
![]() |
45f160a43c | ||
![]() |
36ca2c55db | ||
![]() |
f0c96af9cb | ||
![]() |
31a70191e7 | ||
![]() |
ad96b4c8f5 | ||
![]() |
043dc9d36f | ||
![]() |
52f7c75cff | ||
![]() |
f6e588afc0 | ||
![]() |
a001296703 | ||
![]() |
2cbd8c6781 | ||
![]() |
8585dc4cdc | ||
![]() |
dd81769c62 | ||
![]() |
46bc9b7d7c | ||
![]() |
b78531a36a | ||
![]() |
11e6a0b641 | ||
![]() |
15cda1ef77 | ||
![]() |
055f0d3d06 | ||
![]() |
cdd94c2eae |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.30*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.30**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2016.05.16
|
||||
[debug] youtube-dl version 2016.05.30
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
6
.gitignore
vendored
6
.gitignore
vendored
@@ -28,12 +28,16 @@ updates_key.pem
|
||||
*.mp4
|
||||
*.m4a
|
||||
*.m4v
|
||||
*.mp3
|
||||
*.part
|
||||
*.swp
|
||||
test/testdata
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
|
||||
# IntelliJ related files
|
||||
.idea
|
||||
.idea/*
|
||||
*.iml
|
||||
|
||||
tmp/
|
||||
|
4
Makefile
4
Makefile
@@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi *.mkv *.webm CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
@@ -69,7 +69,7 @@ README.txt: README.md
|
||||
pandoc -f markdown -t plain README.md -o README.txt
|
||||
|
||||
youtube-dl.1: README.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py >youtube-dl.1.temp.md
|
||||
$(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
|
||||
pandoc -s -f markdown -t man youtube-dl.1.temp.md -o youtube-dl.1
|
||||
rm -f youtube-dl.1.temp.md
|
||||
|
||||
|
27
README.md
27
README.md
@@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
|
||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
|
||||
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29).
|
||||
Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
|
||||
|
||||
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
|
||||
|
||||
@@ -73,8 +73,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file /etc
|
||||
/youtube-dl.conf: Do not read the user
|
||||
in the global configuration file
|
||||
/etc/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
@@ -256,11 +256,12 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default $XDG_CACHE_HOME
|
||||
/youtube-dl or ~/.cache/youtube-dl . At the
|
||||
moment, only YouTube player files (for
|
||||
videos with obfuscated signatures) are
|
||||
cached, but that may change.
|
||||
permanently. By default
|
||||
$XDG_CACHE_HOME/youtube-dl or
|
||||
~/.cache/youtube-dl . At the moment, only
|
||||
YouTube player files (for videos with
|
||||
obfuscated signatures) are cached, but that
|
||||
may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
@@ -433,7 +434,7 @@ You can use `--ignore-config` if you want to disable the configuration file for
|
||||
|
||||
### Authentication with `.netrc` file
|
||||
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
|
||||
```
|
||||
touch $HOME/.netrc
|
||||
chmod a-rwx,u+rw $HOME/.netrc
|
||||
@@ -693,6 +694,10 @@ hash -r
|
||||
|
||||
Again, from then on you'll be able to update with `sudo youtube-dl -U`.
|
||||
|
||||
### youtube-dl is extremely slow to start on Windows
|
||||
|
||||
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
|
||||
|
||||
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
|
||||
|
||||
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
|
||||
@@ -780,9 +785,9 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
|
||||
|
||||
Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
|
||||
|
||||
### The exe throws a *Runtime error from Visual C++*
|
||||
### The exe throws an error due to missing `MSVCR100.dll`
|
||||
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2008 Redistributable Package](http://www.microsoft.com/en-us/download/details.aspx?id=29).
|
||||
To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
|
||||
|
||||
### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
|
||||
|
||||
|
@@ -1,17 +1,42 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingMixIn
|
||||
import argparse
|
||||
import ctypes
|
||||
import functools
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import traceback
|
||||
import os.path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
|
||||
from youtube_dl.compat import (
|
||||
compat_http_server,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
|
||||
# These are not used outside of buildserver.py thus not in compat.py
|
||||
|
||||
try:
|
||||
import winreg as compat_winreg
|
||||
except ImportError: # Python 2
|
||||
import _winreg as compat_winreg
|
||||
|
||||
try:
|
||||
import socketserver as compat_socketserver
|
||||
except ImportError: # Python 2
|
||||
import SocketServer as compat_socketserver
|
||||
|
||||
try:
|
||||
compat_input = raw_input
|
||||
except NameError: # Python 3
|
||||
compat_input = input
|
||||
|
||||
|
||||
class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer):
|
||||
allow_reuse_address = True
|
||||
|
||||
|
||||
@@ -191,7 +216,7 @@ def main(args=None):
|
||||
action='store_const', dest='action', const='service',
|
||||
help='Run as a Windows service')
|
||||
parser.add_argument('-b', '--bind', metavar='<host:port>',
|
||||
action='store', default='localhost:8142',
|
||||
action='store', default='0.0.0.0:8142',
|
||||
help='Bind to host:port (default %default)')
|
||||
options = parser.parse_args(args=args)
|
||||
|
||||
@@ -216,7 +241,7 @@ def main(args=None):
|
||||
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
|
||||
thr = threading.Thread(target=srv.serve_forever)
|
||||
thr.start()
|
||||
input('Press ENTER to shut down')
|
||||
compat_input('Press ENTER to shut down')
|
||||
srv.shutdown()
|
||||
thr.join()
|
||||
|
||||
@@ -231,8 +256,6 @@ def rmtree(path):
|
||||
os.remove(fname)
|
||||
os.rmdir(path)
|
||||
|
||||
#==============================================================================
|
||||
|
||||
|
||||
class BuildError(Exception):
|
||||
def __init__(self, output, code=500):
|
||||
@@ -249,15 +272,25 @@ class HTTPError(BuildError):
|
||||
|
||||
class PythonBuilder(object):
|
||||
def __init__(self, **kwargs):
|
||||
pythonVersion = kwargs.pop('python', '2.7')
|
||||
try:
|
||||
key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
|
||||
python_version = kwargs.pop('python', '3.4')
|
||||
python_path = None
|
||||
for node in ('Wow6432Node\\', ''):
|
||||
try:
|
||||
self.pythonPath, _ = _winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
_winreg.CloseKey(key)
|
||||
except Exception:
|
||||
raise BuildError('No such Python version: %s' % pythonVersion)
|
||||
key = compat_winreg.OpenKey(
|
||||
compat_winreg.HKEY_LOCAL_MACHINE,
|
||||
r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version))
|
||||
try:
|
||||
python_path, _ = compat_winreg.QueryValueEx(key, '')
|
||||
finally:
|
||||
compat_winreg.CloseKey(key)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not python_path:
|
||||
raise BuildError('No such Python version: %s' % python_version)
|
||||
|
||||
self.pythonPath = python_path
|
||||
|
||||
super(PythonBuilder, self).__init__(**kwargs)
|
||||
|
||||
@@ -305,8 +338,10 @@ class YoutubeDLBuilder(object):
|
||||
|
||||
def build(self):
|
||||
try:
|
||||
subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
cwd=self.buildPath)
|
||||
proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath)
|
||||
proc.wait()
|
||||
#subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
|
||||
# cwd=self.buildPath)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise BuildError(e.output)
|
||||
|
||||
@@ -369,12 +404,12 @@ class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, Clea
|
||||
pass
|
||||
|
||||
|
||||
class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching.
|
||||
|
||||
def do_GET(self):
|
||||
path = urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
|
||||
path = compat_urlparse.urlparse(self.path)
|
||||
paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()])
|
||||
action, _, path = path.path.strip('/').partition('/')
|
||||
if path:
|
||||
path = path.split('/')
|
||||
@@ -388,7 +423,7 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
builder.close()
|
||||
except BuildError as e:
|
||||
self.send_response(e.code)
|
||||
msg = unicode(e).encode('UTF-8')
|
||||
msg = compat_str(e).encode('UTF-8')
|
||||
self.send_header('Content-Type', 'text/plain; charset=UTF-8')
|
||||
self.send_header('Content-Length', len(msg))
|
||||
self.end_headers()
|
||||
@@ -400,7 +435,5 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
else:
|
||||
self.send_response(500, 'Malformed URL')
|
||||
|
||||
#==============================================================================
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -1,13 +1,46 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import optparse
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
README_FILE = os.path.join(ROOT_DIR, 'README.md')
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
|
||||
options, args = parser.parse_args()
|
||||
if len(args) != 1:
|
||||
parser.error('Expected an output filename')
|
||||
|
||||
outfile, = args
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
with io.open(outfile, 'w', encoding='utf-8') as outf:
|
||||
outf.write(readme)
|
||||
|
||||
|
||||
def filter_options(readme):
|
||||
ret = ''
|
||||
@@ -37,27 +70,5 @@ def filter_options(readme):
|
||||
|
||||
return ret
|
||||
|
||||
with io.open(README_FILE, encoding='utf-8') as f:
|
||||
readme = f.read()
|
||||
|
||||
PREFIX = '''%YOUTUBE-DL(1)
|
||||
|
||||
# NAME
|
||||
|
||||
youtube\-dl \- download videos from youtube.com or other video platforms
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**youtube-dl** \[OPTIONS\] URL [URL...]
|
||||
|
||||
'''
|
||||
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
|
||||
readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
|
||||
readme = PREFIX + readme
|
||||
|
||||
readme = filter_options(readme)
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
print(readme.encode('utf-8'))
|
||||
else:
|
||||
print(readme)
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -6,7 +6,7 @@
|
||||
# * the git config user.signingkey is properly set
|
||||
|
||||
# You will need
|
||||
# pip install coverage nose rsa
|
||||
# pip install coverage nose rsa wheel
|
||||
|
||||
# TODO
|
||||
# release notes
|
||||
@@ -15,10 +15,28 @@
|
||||
set -e
|
||||
|
||||
skip_tests=true
|
||||
if [ "$1" = '--run-tests' ]; then
|
||||
skip_tests=false
|
||||
shift
|
||||
fi
|
||||
buildserver='localhost:8142'
|
||||
|
||||
while true
|
||||
do
|
||||
case "$1" in
|
||||
--run-tests)
|
||||
skip_tests=false
|
||||
shift
|
||||
;;
|
||||
--buildserver)
|
||||
buildserver="$2"
|
||||
shift 2
|
||||
;;
|
||||
--*)
|
||||
echo "ERROR: unknown option $1"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
||||
version="$1"
|
||||
@@ -33,6 +51,9 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th
|
||||
useless_files=$(find youtube_dl -type f -not -name '*.py')
|
||||
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
|
||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
||||
|
||||
/bin/echo -e "\n### First of all, testing..."
|
||||
make clean
|
||||
@@ -64,7 +85,7 @@ git push origin "$version"
|
||||
REV=$(git rev-parse HEAD)
|
||||
make youtube-dl youtube-dl.tar.gz
|
||||
read -p "VM running? (y/n) " -n 1
|
||||
wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
|
||||
mkdir -p "build/$version"
|
||||
mv youtube-dl youtube-dl.exe "build/$version"
|
||||
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
|
||||
|
@@ -16,6 +16,8 @@
|
||||
- **9gag**
|
||||
- **abc.net.au**
|
||||
- **Abc7News**
|
||||
- **abcnews**
|
||||
- **abcnews:video**
|
||||
- **AcademicEarth:Course**
|
||||
- **acast**
|
||||
- **acast:channel**
|
||||
@@ -41,8 +43,8 @@
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **ARD:mediathek**
|
||||
- **ARD:mediathek**: Saarländischer Rundfunk
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
@@ -104,6 +106,7 @@
|
||||
- **CBCPlayer**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSNews**: CBS News
|
||||
- **CBSNewsLiveVideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
@@ -133,6 +136,7 @@
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralShows**: The Daily Show / The Colbert Report
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
@@ -202,6 +206,7 @@
|
||||
- **exfm**: ex.fm
|
||||
- **ExpoTV**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
- **faz.net**
|
||||
- **fc2**
|
||||
@@ -213,6 +218,7 @@
|
||||
- **Flickr**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **Foxgay**
|
||||
- **FoxNews**: Fox News and Fox Business Video
|
||||
@@ -316,13 +322,14 @@
|
||||
- **la7.tv**
|
||||
- **Laola1Tv**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lemonde**
|
||||
- **LePlaylist**
|
||||
- **LetvCloud**: 乐视云
|
||||
- **Libsyn**
|
||||
- **life**: Life.ru
|
||||
- **life:embed**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
@@ -331,6 +338,7 @@
|
||||
- **livestream**
|
||||
- **livestream:original**
|
||||
- **LnkGo**
|
||||
- **LocalNews8**
|
||||
- **LoveHomePorn**
|
||||
- **lrt.lt**
|
||||
- **lynda**: lynda.com videos
|
||||
@@ -506,6 +514,8 @@
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiocanada**
|
||||
- **RadioCanadaAudioVideo**
|
||||
- **radiofrance**
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
@@ -515,6 +525,7 @@
|
||||
- **RedTube**
|
||||
- **RegioTV**
|
||||
- **Restudy**
|
||||
- **Reuters**
|
||||
- **ReverbNation**
|
||||
- **Revision3**
|
||||
- **RICE**
|
||||
@@ -556,6 +567,7 @@
|
||||
- **ScreenJunkies**
|
||||
- **ScreenwaveMedia**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **Shahid**
|
||||
@@ -675,8 +687,8 @@
|
||||
- **TVCArticle**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **tvp.pl**
|
||||
- **tvp.pl:Series**
|
||||
- **tvp**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlay**: TV3Play and related services
|
||||
- **Tweakers**
|
||||
- **twitch:chapter**
|
||||
@@ -759,7 +771,8 @@
|
||||
- **VuClip**
|
||||
- **vulture.com**
|
||||
- **Walla**
|
||||
- **WashingtonPost**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
- **WatchIndianPorn**: Watch Indian Porn
|
||||
- **WDR**
|
||||
|
@@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
|
||||
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
|
||||
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
|
||||
|
||||
def test_compat_etree_fromstring_doctype(self):
|
||||
xml = '''<?xml version="1.0"?>
|
||||
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
|
||||
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
|
||||
compat_etree_fromstring(xml)
|
||||
|
||||
def test_struct_unpack(self):
|
||||
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
|
||||
|
||||
|
@@ -50,6 +50,8 @@ from youtube_dl.utils import (
|
||||
sanitize_path,
|
||||
prepend_extension,
|
||||
replace_extension,
|
||||
remove_start,
|
||||
remove_end,
|
||||
remove_quotes,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
@@ -215,6 +217,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||
|
||||
def test_remove_start(self):
|
||||
self.assertEqual(remove_start(None, 'A - '), None)
|
||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||
self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
|
||||
|
||||
def test_remove_end(self):
|
||||
self.assertEqual(remove_end(None, ' - B'), None)
|
||||
self.assertEqual(remove_end('A - B', ' - B'), 'A')
|
||||
self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
|
||||
|
||||
def test_remove_quotes(self):
|
||||
self.assertEqual(remove_quotes(None), None)
|
||||
self.assertEqual(remove_quotes('"'), '"')
|
||||
|
@@ -245,13 +245,20 @@ try:
|
||||
except ImportError: # Python 2.6
|
||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||
|
||||
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
|
||||
class _TreeBuilder(etree.TreeBuilder):
|
||||
def doctype(self, name, pubid, system):
|
||||
pass
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
compat_etree_fromstring = xml.etree.ElementTree.fromstring
|
||||
def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
else:
|
||||
# python 2.x tries to encode unicode strings with ascii (see the
|
||||
# XMLParser._fixtext method)
|
||||
etree = xml.etree.ElementTree
|
||||
|
||||
try:
|
||||
_etree_iter = etree.Element.iter
|
||||
except AttributeError: # Python <=2.6
|
||||
@@ -265,7 +272,7 @@ else:
|
||||
# 2.7 source
|
||||
def _XML(text, parser=None):
|
||||
if not parser:
|
||||
parser = etree.XMLParser(target=etree.TreeBuilder())
|
||||
parser = etree.XMLParser(target=_TreeBuilder())
|
||||
parser.feed(text)
|
||||
return parser.close()
|
||||
|
||||
@@ -277,7 +284,7 @@ else:
|
||||
return el
|
||||
|
||||
def compat_etree_fromstring(text):
|
||||
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
|
||||
doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
|
||||
for el in _etree_iter(doc):
|
||||
if el.text is not None and isinstance(el.text, bytes):
|
||||
el.text = el.text.decode('utf-8')
|
||||
|
@@ -319,7 +319,7 @@ class F4mFD(FragmentFD):
|
||||
doc = compat_etree_fromstring(manifest)
|
||||
formats = [(int(f.attrib.get('bitrate', -1)), f)
|
||||
for f in self._get_unencrypted_media(doc)]
|
||||
if requested_bitrate is None:
|
||||
if requested_bitrate is None or len(formats) == 1:
|
||||
# get the best format
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
|
135
youtube_dl/extractor/abcnews.py
Normal file
135
youtube_dl/extractor/abcnews.py
Normal file
@@ -0,0 +1,135 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import re
|
||||
import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = 'http://abcnews.go.com/[^/]+/video/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932',
|
||||
'info_dict': {
|
||||
'id': '20411932',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'week-exclusive-irans-foreign-minister-zarif',
|
||||
'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif',
|
||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||
'duration': 180,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
class AbcNewsIE(InfoExtractor):
|
||||
IE_NAME = 'abcnews'
|
||||
_VALID_URL = 'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
'info_dict': {
|
||||
'id': '10498713',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20100428',
|
||||
'timestamp': 1272412800,
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '39125818',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||
'upload_date': '20160515',
|
||||
'timestamp': 1463329500,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
# The embedded YouTube video is blocked due to copyright issues
|
||||
'playlist_items': '1',
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
||||
youtube_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="(https://www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'YouTube URL', default=None)
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||
webpage, 'timestamp', fatal=False)
|
||||
if date_str:
|
||||
tz_offset = 0
|
||||
if date_str.endswith(' ET'): # Eastern Time
|
||||
tz_offset = -5
|
||||
date_str = date_str[:-3]
|
||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||
for date_format in date_formats:
|
||||
try:
|
||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||
except ValueError:
|
||||
continue
|
||||
if timestamp is not None:
|
||||
timestamp -= tz_offset * 3600
|
||||
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
'url': full_video_url,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, 'Youtube')]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
@@ -52,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
for media_data in media_content:
|
||||
media = media_data['@attributes']
|
||||
media_type = media['type']
|
||||
if media_type == 'video/f4m':
|
||||
if media_type in ('video/f4m', 'application/f4m+xml'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media['url'] + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
@@ -61,7 +61,7 @@ class AMPIE(InfoExtractor):
|
||||
media['url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data['media-category']['@attributes']['label'],
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
|
224
youtube_dl/extractor/anvato.py
Normal file
224
youtube_dl/extractor/anvato.py
Normal file
@@ -0,0 +1,224 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
def md5_text(s):
|
||||
if not isinstance(s, compat_str):
|
||||
s = compat_str(s)
|
||||
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
|
||||
'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
|
||||
'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
|
||||
'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
|
||||
'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
|
||||
'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
|
||||
'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
|
||||
'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
|
||||
'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
|
||||
'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
|
||||
'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
|
||||
'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
|
||||
'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
|
||||
'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
|
||||
'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
|
||||
'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
|
||||
'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
|
||||
'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
|
||||
'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
|
||||
'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
|
||||
'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
|
||||
'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
|
||||
'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
|
||||
'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
|
||||
'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
|
||||
'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
|
||||
'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
|
||||
'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
|
||||
'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
|
||||
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
|
||||
'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
|
||||
'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
|
||||
'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
|
||||
'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
|
||||
'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
|
||||
'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
|
||||
'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
|
||||
'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
|
||||
'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
|
||||
'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
|
||||
'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
|
||||
'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
|
||||
'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
|
||||
'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
|
||||
'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
|
||||
'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
|
||||
'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
|
||||
'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
|
||||
'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
|
||||
'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||
'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
|
||||
'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
|
||||
'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
|
||||
'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
|
||||
'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
|
||||
'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
|
||||
'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
|
||||
'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
|
||||
'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
|
||||
'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
|
||||
'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
|
||||
'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
|
||||
'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
|
||||
'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
|
||||
'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
|
||||
'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
|
||||
'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
|
||||
'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
|
||||
'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
|
||||
'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
|
||||
'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
|
||||
'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
|
||||
'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
|
||||
'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||
'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
|
||||
'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
|
||||
'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
|
||||
'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
|
||||
'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
|
||||
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
||||
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
||||
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||
}
|
||||
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
|
||||
def _server_time(self, access_key, video_id):
|
||||
if self.__server_time is not None:
|
||||
return self.__server_time
|
||||
|
||||
self.__server_time = int(self._download_json(
|
||||
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
|
||||
note='Fetching server time')['server_time'])
|
||||
|
||||
return self.__server_time
|
||||
|
||||
def _api_prefix(self, access_key):
|
||||
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
|
||||
|
||||
def _get_video_json(self, access_key, video_id):
|
||||
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
|
||||
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
|
||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||
payload = {
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
data=json.dumps(payload).encode('utf-8'))
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
|
||||
video_id = anvplayer_data['video']
|
||||
access_key = anvplayer_data['accessKey']
|
||||
|
||||
video_data = self._get_video_json(access_key, video_id)
|
||||
|
||||
formats = []
|
||||
for published_url in video_data['published_urls']:
|
||||
video_url = published_url['embed_url']
|
||||
ext = determine_ext(video_url)
|
||||
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(video_url, video_id))
|
||||
continue
|
||||
|
||||
tbr = int_or_none(published_url.get('kbps'))
|
||||
a_format = {
|
||||
'url': video_url,
|
||||
'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
|
||||
'tbr': tbr if tbr != 0 else None,
|
||||
}
|
||||
|
||||
if ext == 'm3u8':
|
||||
# Not using _extract_m3u8_formats here as individual media
|
||||
# playlists are also included in published_urls.
|
||||
if tbr is None:
|
||||
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
|
||||
continue
|
||||
else:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
elif ext == 'mp3':
|
||||
a_format['vcodec'] = 'none'
|
||||
else:
|
||||
a_format.update({
|
||||
'width': int_or_none(published_url.get('width')),
|
||||
'height': int_or_none(published_url.get('height')),
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data.get('def_title'),
|
||||
'description': video_data.get('def_description'),
|
||||
'categories': video_data.get('categories'),
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '2b68e5851514c20efdff2afc5603b8b4',
|
||||
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
@@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
@@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': compat_str(data['id']),
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
|
@@ -1,34 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_xml_parse_error,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
|
||||
_VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
'md5': '2c301e4dab317596e837c3e7633e7d86',
|
||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
||||
'info_dict': {
|
||||
'id': '1554319',
|
||||
'ext': 'flv',
|
||||
'title': '【金坷垃】金泡沫',
|
||||
'duration': 308313,
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'duration': 308.067,
|
||||
'timestamp': 1398012660,
|
||||
'upload_date': '20140420',
|
||||
'thumbnail': 're:^https?://.+\.jpg',
|
||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||
'timestamp': 1397983878,
|
||||
'uploader': '菊子桑',
|
||||
'uploader_id': '156160',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||
@@ -36,75 +44,110 @@ class BiliBiliIE(InfoExtractor):
|
||||
'id': '1041170',
|
||||
'title': '【BD1080P】刀语【诸神&异域】',
|
||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||
'uploader': '枫叶逝去',
|
||||
'timestamp': 1396501299,
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}]
|
||||
|
||||
# BiliBili blocks keys from time to time. The current key is extracted from
|
||||
# the Android client
|
||||
# TODO: find the sign algorithm used in the flash player
|
||||
_APP_KEY = '86385cdc024c0f6c'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
page_num = mobj.group('page_num') or '1'
|
||||
|
||||
view_data = self._download_json(
|
||||
'http://api.bilibili.com/view?type=json&appkey=8e9fc618fbd41e28&id=%s&page=%s' % (video_id, page_num),
|
||||
video_id)
|
||||
if 'error' in view_data:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, view_data['error']), expected=True)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
cid = view_data['cid']
|
||||
title = unescapeHTML(view_data['title'])
|
||||
params = compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
|
||||
webpage, 'player parameters'))
|
||||
cid = params['cid'][0]
|
||||
|
||||
doc = self._download_xml(
|
||||
'http://interface.bilibili.com/v_cdn_play?appkey=8e9fc618fbd41e28&cid=%s' % cid,
|
||||
cid,
|
||||
'Downloading page %s/%s' % (page_num, view_data['pages'])
|
||||
)
|
||||
info_xml_str = self._download_webpage(
|
||||
'http://interface.bilibili.com/v_cdn_play',
|
||||
cid, query={'appkey': self._APP_KEY, 'cid': cid},
|
||||
note='Downloading video info page')
|
||||
|
||||
if xpath_text(doc, './result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, xpath_text(doc, './message')), expected=True)
|
||||
err_msg = None
|
||||
durls = None
|
||||
info_xml = None
|
||||
try:
|
||||
info_xml = compat_etree_fromstring(info_xml_str.encode('utf-8'))
|
||||
except compat_xml_parse_error:
|
||||
info_json = self._parse_json(info_xml_str, video_id, fatal=False)
|
||||
err_msg = (info_json or {}).get('error_text')
|
||||
else:
|
||||
err_msg = xpath_text(info_xml, './message')
|
||||
|
||||
if info_xml is not None:
|
||||
durls = info_xml.findall('./durl')
|
||||
if not durls:
|
||||
if err_msg:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, err_msg), expected=True)
|
||||
else:
|
||||
raise ExtractorError('No videos found!')
|
||||
|
||||
entries = []
|
||||
|
||||
for durl in doc.findall('./durl'):
|
||||
for durl in durls:
|
||||
size = xpath_text(durl, ['./filesize', './size'])
|
||||
formats = [{
|
||||
'url': durl.find('./url').text,
|
||||
'filesize': int_or_none(size),
|
||||
'ext': 'flv',
|
||||
}]
|
||||
backup_urls = durl.find('./backup_url')
|
||||
if backup_urls is not None:
|
||||
for backup_url in backup_urls.findall('./url'):
|
||||
formats.append({'url': backup_url.text})
|
||||
formats.reverse()
|
||||
for backup_url in durl.findall('./backup_url/url'):
|
||||
formats.append({
|
||||
'url': backup_url.text,
|
||||
# backup URLs have lower priorities
|
||||
'preference': -2 if 'hd.mp4' in backup_url.text else -3,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
'id': '%s_part%s' % (cid, xpath_text(durl, './order')),
|
||||
'title': title,
|
||||
'duration': int_or_none(xpath_text(durl, './length'), 1000),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title')
|
||||
description = self._html_search_meta('description', webpage)
|
||||
datetime_str = self._html_search_regex(
|
||||
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', fatal=False)
|
||||
if datetime_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M').timetuple())
|
||||
|
||||
# TODO 'view_count' requires deobfuscating Javascript
|
||||
info = {
|
||||
'id': compat_str(cid),
|
||||
'title': title,
|
||||
'description': view_data.get('description'),
|
||||
'thumbnail': view_data.get('pic'),
|
||||
'uploader': view_data.get('author'),
|
||||
'timestamp': int_or_none(view_data.get('created')),
|
||||
'view_count': int_or_none(view_data.get('play')),
|
||||
'duration': int_or_none(xpath_text(doc, './timelength')),
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
|
||||
'duration': float_or_none(xpath_text(info_xml, './timelength'), scale=1000),
|
||||
}
|
||||
|
||||
uploader_mobj = re.search(
|
||||
r'<a[^>]+href="https?://space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"',
|
||||
webpage)
|
||||
if uploader_mobj:
|
||||
info.update({
|
||||
'uploader': uploader_mobj.group('name'),
|
||||
'uploader_id': uploader_mobj.group('id'),
|
||||
})
|
||||
|
||||
for entry in entries:
|
||||
entry.update(info)
|
||||
|
||||
if len(entries) == 1:
|
||||
entries[0].update(info)
|
||||
return entries[0]
|
||||
else:
|
||||
info.update({
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'entries': entries,
|
||||
})
|
||||
return info
|
||||
}
|
||||
|
@@ -444,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
# non numeric ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# unavailable video without message but with error_code
|
||||
'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -514,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)
|
||||
raise ExtractorError(json_data[0]['message'], expected=True)
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
raise ExtractorError(
|
||||
json_data.get('message') or json_data['error_code'], expected=True)
|
||||
raise
|
||||
|
||||
title = json_data['name'].strip()
|
||||
|
@@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
|
||||
'md5': '05850eb8c749e2ee05ad5a1c34668493',
|
||||
'info_dict': {
|
||||
'id': 'studio-c-season-5-episode-5',
|
||||
'ext': 'mp4',
|
||||
@@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -4,65 +4,66 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# with mediaId
|
||||
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
|
||||
'md5': '97e24d09672fc4cf56256d6faa6c25bc',
|
||||
'info_dict': {
|
||||
'id': '2682904050',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Don Cherry – All-Stars',
|
||||
'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
|
||||
'timestamp': 1454475540,
|
||||
'timestamp': 1454463000,
|
||||
'upload_date': '20160203',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
# with clipId
|
||||
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
|
||||
'md5': '0274a90b51a9b4971fe005c63f592f12',
|
||||
'info_dict': {
|
||||
'id': '2487345465',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robin Williams freestyles on 90 Minutes Live',
|
||||
'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '19780210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
'timestamp': 255977160,
|
||||
},
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
'playlist': [{
|
||||
'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
|
||||
'info_dict': {
|
||||
'id': '2680832926',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
|
||||
'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20160201',
|
||||
'timestamp': 1454342820,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
'md5': '415a0e3f586113894174dfb31aa5bb1a',
|
||||
'info_dict': {
|
||||
'id': '2658915080',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fly like an eagle!',
|
||||
'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
|
||||
'upload_date': '19700101',
|
||||
'upload_date': '20150315',
|
||||
'timestamp': 1426443984,
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -91,24 +92,54 @@ class CBCIE(InfoExtractor):
|
||||
|
||||
class CBCPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||
'info_dict': {
|
||||
'id': '2683190193',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gerry Runs a Sweat Shop',
|
||||
'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
|
||||
'timestamp': 1455067800,
|
||||
'timestamp': 1455071400,
|
||||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||
'info_dict': {
|
||||
'id': '2657631896',
|
||||
'ext': 'mp3',
|
||||
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
'md5': '17a61eb813539abea40618d6323a7f82',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'flv',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id,
|
||||
'ThePlatformFeed', video_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(
|
||||
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
@@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
|
||||
|
||||
|
||||
class CBSIE(CBSBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||
@@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE):
|
||||
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
content_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not content_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
content_id = self._search_regex(
|
||||
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
|
||||
webpage, 'content id')
|
||||
items_data = self._download_xml(
|
||||
'http://can.cbs.com/thunder/player/videoPlayerService.php',
|
||||
content_id, query={'partner': 'cbs', 'contentId': content_id})
|
||||
|
84
youtube_dl/extractor/cbslocal.py
Normal file
84
youtube_dl/extractor/cbslocal.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
|
||||
'md5': 'f0ee3081e3843f575fccef901199b212',
|
||||
'info_dict': {
|
||||
'id': '3401037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
|
||||
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1463440500,
|
||||
'upload_date': '20160516',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\KCBSTV',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\Yahoo',
|
||||
'Syndication\\Tribune',
|
||||
'Syndication\\Curb.tv',
|
||||
'Content\\News'
|
||||
],
|
||||
},
|
||||
}, {
|
||||
# SendtoNews embed
|
||||
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'upload_date': '20160516',
|
||||
'timestamp': 1463433840,
|
||||
'duration': 49,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
sendtonews_url = SendtoNewsIE._extract_url(webpage)
|
||||
if sendtonews_url:
|
||||
info_dict = {
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, sendtonews_url),
|
||||
}
|
||||
else:
|
||||
info_dict = self._extract_anvato_videos(webpage, display_id)
|
||||
|
||||
time_str = self._html_search_regex(
|
||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||
timestamp = None
|
||||
if time_str:
|
||||
timestamp = calendar.timegm(datetime.datetime.strptime(
|
||||
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
||||
|
||||
info_dict.update({
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
|
||||
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|
||||
|https?://(:www\.)?
|
||||
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
|
||||
(?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
|
||||
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
|
||||
(?P<clip>
|
||||
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
(?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|
||||
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|
||||
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
|
||||
)|
|
||||
@@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_available_formats = ['3500', '2200', '1700', '1200', '750', '400']
|
||||
|
@@ -987,7 +987,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
manifest = self._download_xml(
|
||||
manifest_url, video_id, 'Downloading f4m manifest',
|
||||
'Unable to download f4m manifest',
|
||||
@@ -1001,11 +1001,11 @@ class InfoExtractor(object):
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
|
||||
|
||||
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=True):
|
||||
fatal=True, m3u8_id=None):
|
||||
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
|
||||
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
|
||||
if akamai_pv is not None and ';' in akamai_pv.text:
|
||||
@@ -1029,9 +1029,26 @@ class InfoExtractor(object):
|
||||
'base URL', default=None)
|
||||
if base_url:
|
||||
base_url = base_url.strip()
|
||||
|
||||
bootstrap_info = xpath_text(
|
||||
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
|
||||
'bootstrap info', default=None)
|
||||
|
||||
for i, media_el in enumerate(media_nodes):
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href') or media_el.attrib.get('url')
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
width = int_or_none(media_el.attrib.get('width'))
|
||||
height = int_or_none(media_el.attrib.get('height'))
|
||||
format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
|
||||
# If <bootstrapInfo> is present, the specified f4m is a
|
||||
# stream-level manifest, and only set-level manifests may refer to
|
||||
# external resources. See section 11.4 and section 4 of F4M spec
|
||||
if bootstrap_info is None:
|
||||
media_url = None
|
||||
# @href is introduced in 2.0, see section 11.6 of F4M spec
|
||||
if manifest_version == '2.0':
|
||||
media_url = media_el.attrib.get('href')
|
||||
if media_url is None:
|
||||
media_url = media_el.attrib.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
manifest_url = (
|
||||
@@ -1041,29 +1058,43 @@ class InfoExtractor(object):
|
||||
# since bitrates in parent manifest (this one) and media_url manifest
|
||||
# may differ leading to inability to resolve the format by requested
|
||||
# bitrate in f4m downloader
|
||||
if determine_ext(manifest_url) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
ext = determine_ext(manifest_url)
|
||||
if ext == 'f4m':
|
||||
f4m_formats = self._extract_f4m_formats(
|
||||
manifest_url, video_id, preference=preference, f4m_id=f4m_id,
|
||||
transform_source=transform_source, fatal=fatal))
|
||||
transform_source=transform_source, fatal=fatal)
|
||||
# Sometimes stream-level manifest contains single media entry that
|
||||
# does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
|
||||
# At the same time parent's media entry in set-level manifest may
|
||||
# contain it. We will copy it from parent in such cases.
|
||||
if len(f4m_formats) == 1:
|
||||
f = f4m_formats[0]
|
||||
f.update({
|
||||
'tbr': f.get('tbr') or tbr,
|
||||
'width': f.get('width') or width,
|
||||
'height': f.get('height') or height,
|
||||
'format_id': f.get('format_id') if not tbr else format_id,
|
||||
})
|
||||
formats.extend(f4m_formats)
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', preference=preference,
|
||||
m3u8_id=m3u8_id, fatal=fatal))
|
||||
continue
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
'format_id': format_id,
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'ext': 'flv' if bootstrap_info else None,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(media_el.attrib.get('width')),
|
||||
'height': int_or_none(media_el.attrib.get('height')),
|
||||
'width': width,
|
||||
'height': height,
|
||||
'preference': preference,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [{
|
||||
def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
|
||||
return {
|
||||
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
|
||||
'url': m3u8_url,
|
||||
'ext': ext,
|
||||
@@ -1071,7 +1102,14 @@ class InfoExtractor(object):
|
||||
'preference': preference - 1 if preference else -1,
|
||||
'resolution': 'multiple',
|
||||
'format_note': 'Quality selection URL',
|
||||
}]
|
||||
}
|
||||
|
||||
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
@@ -1138,7 +1176,7 @@ class InfoExtractor(object):
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None
|
||||
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media_name
|
||||
@@ -1278,21 +1316,21 @@ class InfoExtractor(object):
|
||||
m3u8_count = 0
|
||||
|
||||
srcs = []
|
||||
videos = smil.findall(self._xpath_ns('.//video', namespace))
|
||||
for video in videos:
|
||||
src = video.get('src')
|
||||
media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
|
||||
for medium in media:
|
||||
src = medium.get('src')
|
||||
if not src or src in srcs:
|
||||
continue
|
||||
srcs.append(src)
|
||||
|
||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(video.get('size') or video.get('fileSize'))
|
||||
width = int_or_none(video.get('width'))
|
||||
height = int_or_none(video.get('height'))
|
||||
proto = video.get('proto')
|
||||
ext = video.get('ext')
|
||||
bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
|
||||
filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
|
||||
width = int_or_none(medium.get('width'))
|
||||
height = int_or_none(medium.get('height'))
|
||||
proto = medium.get('proto')
|
||||
ext = medium.get('ext')
|
||||
src_ext = determine_ext(src)
|
||||
streamer = video.get('streamer') or base
|
||||
streamer = medium.get('streamer') or base
|
||||
|
||||
if proto == 'rtmp' or streamer.startswith('rtmp'):
|
||||
rtmp_count += 1
|
||||
|
143
youtube_dl/extractor/coub.py
Normal file
143
youtube_dl/extractor/coub.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class CoubIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:coub:|https?://(?:coub\.com/(?:view|embed|coubs)/|c-cdn\.coub\.com/fb-player\.swf\?.*\bcoub(?:ID|id)=))(?P<id>[\da-z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://coub.com/view/5u5n1',
|
||||
'info_dict': {
|
||||
'id': '5u5n1',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Matrix Moonwalk',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://c-cdn.coub.com/fb-player.swf?bot_type=vk&coubID=7w5a4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'coub:5u5n1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# longer video id
|
||||
'url': 'http://coub.com/view/237d5l5h',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
coub = self._download_json(
|
||||
'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id)
|
||||
|
||||
if coub.get('error'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, coub['error']), expected=True)
|
||||
|
||||
title = coub['title']
|
||||
|
||||
file_versions = coub['file_versions']
|
||||
|
||||
QUALITIES = ('low', 'med', 'high')
|
||||
|
||||
MOBILE = 'mobile'
|
||||
IPHONE = 'iphone'
|
||||
HTML5 = 'html5'
|
||||
|
||||
SOURCE_PREFERENCE = (MOBILE, IPHONE, HTML5)
|
||||
|
||||
quality_key = qualities(QUALITIES)
|
||||
preference_key = qualities(SOURCE_PREFERENCE)
|
||||
|
||||
formats = []
|
||||
|
||||
for kind, items in file_versions.get(HTML5, {}).items():
|
||||
if kind not in ('video', 'audio'):
|
||||
continue
|
||||
if not isinstance(items, dict):
|
||||
continue
|
||||
for quality, item in items.items():
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_url = item.get('url')
|
||||
if not item_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
|
||||
'filesize': int_or_none(item.get('size')),
|
||||
'vcodec': 'none' if kind == 'audio' else None,
|
||||
'quality': quality_key(quality),
|
||||
'preference': preference_key(HTML5),
|
||||
})
|
||||
|
||||
iphone_url = file_versions.get(IPHONE, {}).get('url')
|
||||
if iphone_url:
|
||||
formats.append({
|
||||
'url': iphone_url,
|
||||
'format_id': IPHONE,
|
||||
'preference': preference_key(IPHONE),
|
||||
})
|
||||
|
||||
mobile_url = file_versions.get(MOBILE, {}).get('audio_url')
|
||||
if mobile_url:
|
||||
formats.append({
|
||||
'url': mobile_url,
|
||||
'format_id': '%s-audio' % MOBILE,
|
||||
'preference': preference_key(MOBILE),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = coub.get('picture')
|
||||
duration = float_or_none(coub.get('duration'))
|
||||
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
|
||||
uploader = coub.get('channel', {}).get('title')
|
||||
uploader_id = coub.get('channel', {}).get('permalink')
|
||||
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
age_limit = 18 if age_restricted is True else 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -2,13 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
IE_NAME = 'dw'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
|
||||
@@ -31,6 +34,16 @@ class DWIE(InfoExtractor):
|
||||
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
|
||||
'upload_date': '20160311',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798',
|
||||
'md5': '56b6214ef463bfb9a3b71aeb886f3cf1',
|
||||
'info_dict': {
|
||||
'id': '19274438',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the 90s – Hip Hop',
|
||||
'description': 'Welcome to the 90s - The Golden Decade of Hip Hop',
|
||||
'upload_date': '20160521',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -38,6 +51,7 @@ class DWIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
title = hidden_inputs['media_title']
|
||||
media_id = hidden_inputs.get('media_id') or media_id
|
||||
|
||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
||||
formats = self._extract_smil_formats(
|
||||
@@ -49,13 +63,20 @@ class DWIE(InfoExtractor):
|
||||
else:
|
||||
formats = [{'url': hidden_inputs['file_name']}]
|
||||
|
||||
upload_date = hidden_inputs.get('display_date')
|
||||
if not upload_date:
|
||||
upload_date = self._html_search_regex(
|
||||
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
||||
'upload date', default=None)
|
||||
upload_date = unified_strdate(upload_date)
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': hidden_inputs.get('preview_image'),
|
||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||
'upload_date': hidden_inputs.get('display_date'),
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@@ -11,8 +11,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
'info_dict': {
|
||||
@@ -23,8 +23,12 @@ class EpornerIE(InfoExtractor):
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://espn.go.com/video/clip?id=10365079',
|
||||
'md5': '60e5d097a523e767d06479335d1bdc58',
|
||||
'info_dict': {
|
||||
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
|
||||
'ext': 'mp4',
|
||||
@@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
|
||||
'description': None,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
|
||||
'url': 'http://espn.go.com/video/clip?id=2743663',
|
||||
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
|
||||
'info_dict': {
|
||||
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
|
||||
'ext': 'mp4',
|
||||
'title': 'Must-See Moments: Best of the MLS season',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['OoyalaExternal'],
|
||||
}, {
|
||||
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
|
||||
'only_matching': True,
|
||||
|
@@ -3,6 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .abc import ABCIE
|
||||
from .abc7news import Abc7NewsIE
|
||||
from .abcnews import (
|
||||
AbcNewsIE,
|
||||
AbcNewsVideoIE,
|
||||
)
|
||||
from .academicearth import AcademicEarthCourseIE
|
||||
from .acast import (
|
||||
ACastIE,
|
||||
@@ -107,6 +111,7 @@ from .cbc import (
|
||||
CBCPlayerIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsIE,
|
||||
@@ -138,6 +143,7 @@ from .cnn import (
|
||||
CNNBlogsIE,
|
||||
CNNArticleIE,
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .collegerama import CollegeRamaIE
|
||||
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
|
||||
from .comcarcoff import ComCarCoffIE
|
||||
@@ -226,6 +232,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .exfm import ExfmIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import FacebookIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import FC2IE
|
||||
@@ -238,6 +245,7 @@ from .fktv import FKTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
from .fourtube import FourTubeIE
|
||||
from .fox import FOXIE
|
||||
from .foxgay import FoxgayIE
|
||||
@@ -365,6 +373,7 @@ from .kuwo import (
|
||||
)
|
||||
from .la7 import LA7IE
|
||||
from .laola1tv import Laola1TvIE
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lemonde import LemondeIE
|
||||
from .leeco import (
|
||||
@@ -390,6 +399,7 @@ from .livestream import (
|
||||
LivestreamShortenerIE,
|
||||
)
|
||||
from .lnkgo import LnkGoIE
|
||||
from .localnews8 import LocalNews8IE
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import LRTIE
|
||||
from .lynda import (
|
||||
@@ -609,6 +619,10 @@ from .qqmusic import (
|
||||
QQMusicPlaylistIE,
|
||||
)
|
||||
from .r7 import R7IE
|
||||
from .radiocanada import (
|
||||
RadioCanadaIE,
|
||||
RadioCanadaAudioVideoIE,
|
||||
)
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
@@ -622,6 +636,7 @@ from .rds import RDSIE
|
||||
from .redtube import RedTubeIE
|
||||
from .regiotv import RegioTVIE
|
||||
from .restudy import RestudyIE
|
||||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .revision3 import Revision3IE
|
||||
from .rice import RICEIE
|
||||
@@ -663,6 +678,7 @@ from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenjunkies import ScreenJunkiesIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .shahid import ShahidIE
|
||||
@@ -818,7 +834,10 @@ from .tvc import (
|
||||
)
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvp import (
|
||||
TVPIE,
|
||||
TVPSeriesIE,
|
||||
)
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
@@ -932,7 +951,10 @@ from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vulture import VultureIE
|
||||
from .walla import WallaIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
from .washingtonpost import (
|
||||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .watchindianporn import WatchIndianPornIE
|
||||
from .wdr import (
|
||||
|
64
youtube_dl/extractor/eyedotv.py
Normal file
64
youtube_dl/extractor/eyedotv.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class EyedoTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
|
||||
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
|
||||
'info_dict': {
|
||||
'id': '16301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
|
||||
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
|
||||
'uploader': 'Afnic Live',
|
||||
'uploader_id': '8023',
|
||||
}
|
||||
}
|
||||
_ROOT_URL = 'http://live.eyedo.net:1935/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
|
||||
|
||||
def _add_ns(path):
|
||||
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
|
||||
|
||||
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
|
||||
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
|
||||
if state_live_code == 'avenir':
|
||||
raise ExtractorError(
|
||||
'%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
|
||||
expected=True)
|
||||
|
||||
is_live = state_live_code == 'live'
|
||||
m3u8_url = None
|
||||
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
|
||||
if is_live:
|
||||
if xpath_text(video_data, 'Cdn') == 'true':
|
||||
m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
|
||||
else:
|
||||
m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
|
||||
'description': xpath_text(video_data, _add_ns('Description')),
|
||||
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
|
||||
'uploader': xpath_text(video_data, _add_ns('Createur')),
|
||||
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
|
||||
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
|
||||
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
|
||||
}
|
26
youtube_dl/extractor/formula1.py
Normal file
26
youtube_dl/extractor/formula1.py
Normal file
@@ -0,0 +1,26 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Formula1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/content/fom-website/en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
||||
'info_dict': {
|
||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
||||
'ext': 'flv',
|
||||
'title': 'Race highlights - Spain 2016',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
ooyala_embed_code = self._search_regex(
|
||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
||||
return self.url_result(
|
||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
@@ -62,6 +62,7 @@ from .digiteka import DigitekaIE
|
||||
from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -717,15 +718,18 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
# Wistia embed
|
||||
{
|
||||
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
'md5': '8788b683c777a5cf25621eaf286d0c23',
|
||||
'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
|
||||
'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
|
||||
'info_dict': {
|
||||
'id': '1cfaf6b7ea',
|
||||
'id': '6e2wtrbdaf',
|
||||
'ext': 'mov',
|
||||
'title': 'md5:51364a8d3d009997ba99656004b5e20d',
|
||||
'duration': 643.0,
|
||||
'filesize': 182808282,
|
||||
'uploader': 'education-portal.com',
|
||||
'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
|
||||
'description': 'a Paywall Videos video from Remilon',
|
||||
'duration': 644.072,
|
||||
'uploader': 'study.com',
|
||||
'timestamp': 1459678540,
|
||||
'upload_date': '20160403',
|
||||
'filesize': 24687186,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -734,14 +738,30 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'uxjb0lwrcz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
|
||||
'title': 'Conversation about Hexagonal Rails Part 1',
|
||||
'description': 'a Martin Fowler video from ThoughtWorks',
|
||||
'duration': 1715.0,
|
||||
'uploader': 'thoughtworks.wistia.com',
|
||||
'upload_date': '20140603',
|
||||
'timestamp': 1401832161,
|
||||
'upload_date': '20140603',
|
||||
},
|
||||
},
|
||||
# Wistia standard embed (async)
|
||||
{
|
||||
'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
|
||||
'info_dict': {
|
||||
'id': '807fafadvk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drip Brennan Dunn Workshop',
|
||||
'description': 'a JV Webinars video from getdrip-1',
|
||||
'duration': 4986.95,
|
||||
'timestamp': 1463607249,
|
||||
'upload_date': '20160518',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Soundcloud embed
|
||||
{
|
||||
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
|
||||
@@ -764,6 +784,19 @@ class GenericIE(InfoExtractor):
|
||||
'title': 'Rosetta #CometLanding webcast HL 10',
|
||||
}
|
||||
},
|
||||
# Another Livestream embed, without 'new.' in URL
|
||||
{
|
||||
'url': 'https://www.freespeech.org/',
|
||||
'info_dict': {
|
||||
'id': '123537347',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
},
|
||||
'params': {
|
||||
# Live stream
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# LazyYT
|
||||
{
|
||||
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
|
||||
@@ -1174,6 +1207,16 @@ class GenericIE(InfoExtractor):
|
||||
'uploader': 'Lake8737',
|
||||
}
|
||||
},
|
||||
# Duplicated embedded video URLs
|
||||
{
|
||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||
'info_dict': {
|
||||
'id': '149298443_480_16c25b74_2',
|
||||
'ext': 'mp4',
|
||||
'title': 'vs. Blue Orange Spring Game',
|
||||
'uploader': 'www.hudl.com',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -1480,6 +1523,11 @@ class GenericIE(InfoExtractor):
|
||||
if bc_urls:
|
||||
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
|
||||
|
||||
# Look for ThePlatform embeds
|
||||
tp_urls = ThePlatformIE._extract_urls(webpage)
|
||||
if tp_urls:
|
||||
return _playlist_from_matches(tp_urls, ie='ThePlatform')
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
|
||||
@@ -1548,21 +1596,26 @@ class GenericIE(InfoExtractor):
|
||||
'url': embed_url,
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
|
||||
if match:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
|
||||
'url': 'wistia:%s' % match.group('id'),
|
||||
'ie_key': 'Wistia',
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'id': match.group('id')
|
||||
}
|
||||
|
||||
match = re.search(
|
||||
r'''(?sx)
|
||||
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
||||
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
|
||||
''', webpage)
|
||||
if match:
|
||||
return self.url_result(self._proto_relative_url(
|
||||
'wistia:%s' % match.group('id')), 'Wistia')
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
if svt_url:
|
||||
@@ -1838,7 +1891,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
|
||||
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
|
||||
r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Livestream')
|
||||
@@ -2081,7 +2134,7 @@ class GenericIE(InfoExtractor):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
entries = []
|
||||
for video_url in found:
|
||||
for video_url in orderedSet(found):
|
||||
video_url = unescapeHTML(video_url)
|
||||
video_url = video_url.replace('\\/', '/')
|
||||
video_url = compat_urlparse.urljoin(url, video_url)
|
||||
|
@@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
|
||||
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '42428ce8a00585f9bc36e49226eae7a1',
|
||||
'info_dict': {
|
||||
'id': 'fk6OhWpXgIQ',
|
||||
'ext': 'mp4',
|
||||
@@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
|
||||
'uploader_id': 'groupon',
|
||||
'uploader': 'Groupon',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
_PROVIDERS = {
|
||||
|
@@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
|
||||
'md5': '8b743df908c42f60cf6496586c7f12c3',
|
||||
'md5': '7d45932269a288149483144f01b99789',
|
||||
'info_dict': {
|
||||
'id': '390161',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
|
||||
'duration': 56.823,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -5,33 +5,50 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class JWPlatformBaseIE(InfoExtractor):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True):
|
||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
|
||||
video_data = jwplayer_data['playlist'][0]
|
||||
|
||||
formats = []
|
||||
for source in video_data['sources']:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_type = source.get('type') or ''
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls'):
|
||||
if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', 'm3u8_native', fatal=False))
|
||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||
elif source_type.startswith('audio'):
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
a_format = {
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
}
|
||||
if source_url.startswith('rtmp'):
|
||||
a_format['ext'] = 'flv',
|
||||
|
||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
'url': rtmp_url,
|
||||
'play_path': prefix + play_path,
|
||||
})
|
||||
if rtmp_params:
|
||||
a_format.update(rtmp_params)
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
33
youtube_dl/extractor/learnr.py
Normal file
33
youtube_dl/extractor/learnr.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LearnrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
|
||||
'md5': '3719fdf0a68397f49899e82c308a89de',
|
||||
'info_dict': {
|
||||
'id': '51624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
|
||||
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
|
||||
'uploader': 'LearnCode.academy',
|
||||
'uploader_id': 'learncodeacademy',
|
||||
'upload_date': '20131021',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._search_regex(
|
||||
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
|
||||
'id': video_id,
|
||||
}
|
@@ -7,48 +7,53 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class LifeNewsIE(InfoExtractor):
|
||||
IE_NAME = 'lifenews'
|
||||
IE_DESC = 'LIFE | NEWS'
|
||||
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
|
||||
IE_NAME = 'life'
|
||||
IE_DESC = 'Life.ru'
|
||||
_VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single video embedded via video/source
|
||||
'url': 'http://lifenews.ru/news/98736',
|
||||
'url': 'https://life.ru/t/новости/98736',
|
||||
'md5': '77c95eaefaca216e32a76a343ad89d23',
|
||||
'info_dict': {
|
||||
'id': '98736',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мужчина нашел дома архив оборонного завода',
|
||||
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
|
||||
'timestamp': 1344154740,
|
||||
'upload_date': '20120805',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# single video embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/152125',
|
||||
'url': 'https://life.ru/t/новости/152125',
|
||||
'md5': '77d19a6f0886cd76bdbf44b4d971a273',
|
||||
'info_dict': {
|
||||
'id': '152125',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
|
||||
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
|
||||
'timestamp': 1427961840,
|
||||
'upload_date': '20150402',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# two videos embedded via iframe
|
||||
'url': 'http://lifenews.ru/news/153461',
|
||||
'url': 'https://life.ru/t/новости/153461',
|
||||
'info_dict': {
|
||||
'id': '153461',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'upload_date': '20150505',
|
||||
'timestamp': 1430825520,
|
||||
'view_count': int,
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
|
||||
@@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'timestamp': 1430825520,
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}, {
|
||||
@@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
|
||||
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
|
||||
'timestamp': 1430825520,
|
||||
'upload_date': '20150505',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'http://lifenews.ru/video/13035',
|
||||
'url': 'https://life.ru/t/новости/213035',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
section = mobj.group('section')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://lifenews.ru/%s/%s' % (section, video_id),
|
||||
video_id, 'Downloading page')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_urls = re.findall(
|
||||
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
|
||||
@@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
|
||||
|
||||
title = remove_end(
|
||||
self._og_search_title(webpage),
|
||||
' - Первый по срочным новостям — LIFE | NEWS')
|
||||
' - Life.ru')
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False)
|
||||
comment_count = self._html_search_regex(
|
||||
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
|
||||
webpage, 'comment count', fatal=False)
|
||||
r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
|
||||
webpage, 'view count', fatal=False, group='value')
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False)
|
||||
if upload_date is not None:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
|
||||
webpage, 'upload date', fatal=False, group='value'))
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'view_count': int_or_none(view_count),
|
||||
'comment_count': int_or_none(comment_count),
|
||||
'upload_date': upload_date,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
def make_entry(video_id, video_url, index=None):
|
||||
@@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='m3u8'))
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='m3u8'))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _extract_stream_info(self, stream_info):
|
||||
broadcast_id = stream_info['broadcast_id']
|
||||
broadcast_id = compat_str(stream_info['broadcast_id'])
|
||||
is_live = stream_info.get('is_live')
|
||||
|
||||
formats = []
|
||||
|
47
youtube_dl/extractor/localnews8.py
Normal file
47
youtube_dl/extractor/localnews8.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LocalNews8IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
|
||||
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
|
||||
'info_dict': {
|
||||
'id': '35183304',
|
||||
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
|
||||
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
|
||||
'duration': 153,
|
||||
'timestamp': 1441844822,
|
||||
'upload_date': '20150910',
|
||||
'uploader_id': 'api',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
|
||||
webpage, 'partner id', group='id')
|
||||
kaltura_id = self._search_regex(
|
||||
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
|
||||
webpage, 'videl id', group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
@@ -1,19 +1,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
month_by_name,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class NDTVIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ndtv\.com/video/(?:[^/]+/)+[^/?^&]+-(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710',
|
||||
'url': 'http://www.ndtv.com/video/news/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal-300710',
|
||||
'md5': '39f992dbe5fb531c395d8bbedb1e5e88',
|
||||
'info_dict': {
|
||||
'id': '300710',
|
||||
@@ -22,7 +21,7 @@ class NDTVIE(InfoExtractor):
|
||||
'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02',
|
||||
'upload_date': '20131208',
|
||||
'duration': 1327,
|
||||
'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
},
|
||||
}
|
||||
|
||||
@@ -30,36 +29,19 @@ class NDTVIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - NDTV')
|
||||
|
||||
filename = self._search_regex(
|
||||
r"__filename='([^']+)'", webpage, 'video filename')
|
||||
video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' %
|
||||
filename)
|
||||
video_url = 'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r"__duration='([^']+)'", webpage, 'duration', fatal=False))
|
||||
|
||||
date_m = re.search(r'''(?x)
|
||||
<p\s+class="vod_dateline">\s*
|
||||
Published\s+On:\s*
|
||||
(?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
|
||||
''', webpage)
|
||||
upload_date = None
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'publish-date', webpage, 'upload date', fatal=False))
|
||||
|
||||
if date_m is not None:
|
||||
month = month_by_name(date_m.group('monthname'))
|
||||
if month is not None:
|
||||
upload_date = '%s%02d%02d' % (
|
||||
date_m.group('year'), month, int(date_m.group('day')))
|
||||
|
||||
description = self._og_search_description(webpage)
|
||||
READ_MORE = ' (Read more)'
|
||||
if description.endswith(READ_MORE):
|
||||
description = description[:-len(READ_MORE)]
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
TITLE_SUFFIX = ' - NDTV'
|
||||
if title.endswith(TITLE_SUFFIX):
|
||||
title = title[:-len(TITLE_SUFFIX)]
|
||||
description = remove_end(self._og_search_description(webpage), ' (Read more)')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -2,8 +2,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
sanitized_Request,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,12 +20,12 @@ class NFBIE(InfoExtractor):
|
||||
'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
|
||||
'info_dict': {
|
||||
'id': 'qallunaat_why_white_people_are_funny',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Qallunaat! Why White People Are Funny ',
|
||||
'description': 'md5:836d8aff55e087d04d9f6df554d4e038',
|
||||
'description': 'md5:6b8e32dde3abf91e58857b174916620c',
|
||||
'duration': 3128,
|
||||
'creator': 'Mark Sandiford',
|
||||
'uploader': 'Mark Sandiford',
|
||||
'uploader_id': 'mark-sandiford',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -31,65 +35,78 @@ class NFBIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(
|
||||
'https://www.nfb.ca/film/%s' % video_id, video_id,
|
||||
'Downloading film page')
|
||||
|
||||
uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"',
|
||||
page, 'director id', fatal=False)
|
||||
uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>',
|
||||
page, 'director name', fatal=False)
|
||||
|
||||
request = sanitized_Request(
|
||||
config = self._download_xml(
|
||||
'https://www.nfb.ca/film/%s/player_config' % video_id,
|
||||
urlencode_postdata({'getConfig': 'true'}))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf')
|
||||
video_id, 'Downloading player config XML',
|
||||
data=urlencode_postdata({'getConfig': 'true'}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
|
||||
})
|
||||
|
||||
config = self._download_xml(request, video_id, 'Downloading player config XML')
|
||||
|
||||
title = None
|
||||
description = None
|
||||
thumbnail = None
|
||||
duration = None
|
||||
formats = []
|
||||
|
||||
def extract_thumbnail(media):
|
||||
thumbnails = {}
|
||||
for asset in media.findall('assets/asset'):
|
||||
thumbnails[asset.get('quality')] = asset.find('default/url').text
|
||||
if not thumbnails:
|
||||
return None
|
||||
if 'high' in thumbnails:
|
||||
return thumbnails['high']
|
||||
return list(thumbnails.values())[0]
|
||||
title, description, thumbnail, duration, uploader, author = [None] * 6
|
||||
thumbnails, formats = [[]] * 2
|
||||
subtitles = {}
|
||||
|
||||
for media in config.findall('./player/stream/media'):
|
||||
if media.get('type') == 'posterImage':
|
||||
thumbnail = extract_thumbnail(media)
|
||||
elif media.get('type') == 'video':
|
||||
duration = int(media.get('duration'))
|
||||
title = media.find('title').text
|
||||
description = media.find('description').text
|
||||
# It seems assets always go from lower to better quality, so no need to sort
|
||||
quality_key = qualities(('low', 'high'))
|
||||
thumbnails = []
|
||||
for asset in media.findall('assets/asset'):
|
||||
for x in asset:
|
||||
asset_url = xpath_text(asset, 'default/url', default=None)
|
||||
if not asset_url:
|
||||
continue
|
||||
quality = asset.get('quality')
|
||||
thumbnails.append({
|
||||
'url': asset_url,
|
||||
'id': quality,
|
||||
'preference': quality_key(quality),
|
||||
})
|
||||
elif media.get('type') == 'video':
|
||||
title = xpath_text(media, 'title', fatal=True)
|
||||
for asset in media.findall('assets/asset'):
|
||||
quality = asset.get('quality')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', quality or '', 'height', default=None))
|
||||
for node in asset:
|
||||
streamer = xpath_text(node, 'streamerURI', default=None)
|
||||
if not streamer:
|
||||
continue
|
||||
play_path = xpath_text(node, 'url', default=None)
|
||||
if not play_path:
|
||||
continue
|
||||
formats.append({
|
||||
'url': x.find('streamerURI').text,
|
||||
'app': x.find('streamerURI').text.split('/', 3)[3],
|
||||
'play_path': x.find('url').text,
|
||||
'url': streamer,
|
||||
'app': streamer.split('/', 3)[3],
|
||||
'play_path': play_path,
|
||||
'rtmp_live': False,
|
||||
'ext': 'mp4',
|
||||
'format_id': '%s-%s' % (x.tag, asset.get('quality')),
|
||||
'ext': 'flv',
|
||||
'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
description = clean_html(xpath_text(media, 'description'))
|
||||
uploader = xpath_text(media, 'author')
|
||||
duration = int_or_none(media.get('duration'))
|
||||
for subtitle in media.findall('./subtitles/subtitle'):
|
||||
subtitle_url = xpath_text(subtitle, 'url', default=None)
|
||||
if not subtitle_url:
|
||||
continue
|
||||
lang = xpath_text(subtitle, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'creator': uploader,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -55,7 +55,9 @@ class NRKBaseIE(InfoExtractor):
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({'url': subtitle_url})
|
||||
subtitles.setdefault('no', []).append({
|
||||
'url': compat_urllib_parse_unquote(subtitle_url)
|
||||
})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': entry_title,
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
determine_ext,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
@@ -15,71 +16,80 @@ from ..compat import compat_urllib_parse_urlencode
|
||||
class OoyalaBaseIE(InfoExtractor):
|
||||
_PLAYER_BASE = 'http://player.ooyala.com/'
|
||||
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?'
|
||||
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
|
||||
|
||||
def _extract(self, content_tree_url, video_id, domain='example.org'):
|
||||
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
|
||||
metadata = content_tree[list(content_tree)[0]]
|
||||
embed_code = metadata['embed_code']
|
||||
pcode = metadata.get('asset_pcode') or embed_code
|
||||
video_info = {
|
||||
'id': embed_code,
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
|
||||
'duration': float_or_none(metadata.get('duration'), 1000),
|
||||
}
|
||||
title = metadata['title']
|
||||
|
||||
auth_data = self._download_json(
|
||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||
compat_urllib_parse_urlencode({
|
||||
'domain': domain,
|
||||
'supportedFormats': 'mp4,rtmp,m3u8,hds',
|
||||
}), video_id)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):
|
||||
auth_data = self._download_json(
|
||||
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
|
||||
compat_urllib_parse_urlencode({
|
||||
'domain': domain,
|
||||
'supportedFormats': supported_format
|
||||
}),
|
||||
video_id, 'Downloading %s JSON' % supported_format)
|
||||
|
||||
cur_auth_data = auth_data['authorization_data'][embed_code]
|
||||
|
||||
if cur_auth_data['authorized']:
|
||||
for stream in cur_auth_data['streams']:
|
||||
url = base64.b64decode(
|
||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
||||
if url in urls:
|
||||
continue
|
||||
urls.append(url)
|
||||
delivery_type = stream['delivery_type']
|
||||
if delivery_type == 'hls' or '.m3u8' in url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
url, embed_code, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif delivery_type == 'hds' or '.f4m' in url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif '.smil' in url:
|
||||
formats.extend(self._extract_smil_formats(
|
||||
url, embed_code, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': stream.get('delivery_type'),
|
||||
'vcodec': stream.get('video_codec'),
|
||||
'format_id': delivery_type,
|
||||
'width': int_or_none(stream.get('width')),
|
||||
'height': int_or_none(stream.get('height')),
|
||||
'abr': int_or_none(stream.get('audio_bitrate')),
|
||||
'vbr': int_or_none(stream.get('video_bitrate')),
|
||||
'fps': float_or_none(stream.get('framerate')),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, cur_auth_data['message']), expected=True)
|
||||
if cur_auth_data['authorized']:
|
||||
for stream in cur_auth_data['streams']:
|
||||
s_url = base64.b64decode(
|
||||
stream['url']['data'].encode('ascii')).decode('utf-8')
|
||||
if s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
ext = determine_ext(s_url, None)
|
||||
delivery_type = stream['delivery_type']
|
||||
if delivery_type == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
s_url, embed_code, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif delivery_type == 'hds' or ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
s_url, embed_code, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': s_url,
|
||||
'ext': ext or stream.get('delivery_type'),
|
||||
'vcodec': stream.get('video_codec'),
|
||||
'format_id': delivery_type,
|
||||
'width': int_or_none(stream.get('width')),
|
||||
'height': int_or_none(stream.get('height')),
|
||||
'abr': int_or_none(stream.get('audio_bitrate')),
|
||||
'vbr': int_or_none(stream.get('video_bitrate')),
|
||||
'fps': float_or_none(stream.get('framerate')),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, cur_auth_data['message']), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_info['formats'] = formats
|
||||
return video_info
|
||||
subtitles = {}
|
||||
for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles[lang] = [{
|
||||
'url': sub_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': embed_code,
|
||||
'title': title,
|
||||
'description': metadata.get('description'),
|
||||
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
|
||||
'duration': float_or_none(metadata.get('duration'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class OoyalaIE(OoyalaBaseIE):
|
||||
|
@@ -2,7 +2,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PeriscopeIE(InfoExtractor):
|
||||
@@ -42,8 +45,11 @@ class PeriscopeIE(InfoExtractor):
|
||||
broadcast = broadcast_data['broadcast']
|
||||
status = broadcast['status']
|
||||
|
||||
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name')
|
||||
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
|
||||
user = broadcast_data.get('user', {})
|
||||
|
||||
uploader = broadcast.get('user_display_name') or user.get('display_name')
|
||||
uploader_id = (broadcast.get('username') or user.get('username') or
|
||||
broadcast.get('user_id') or user.get('id'))
|
||||
|
||||
title = '%s - %s' % (uploader, status) if uploader else status
|
||||
state = broadcast.get('state').lower()
|
||||
@@ -92,6 +98,7 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'LularoeHusbandMike',
|
||||
'title': 'LULAROE HUSBAND MIKE',
|
||||
'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
|
||||
},
|
||||
# Periscope only shows videos in the last 24 hours, so it's possible to
|
||||
# get 0 videos
|
||||
@@ -103,16 +110,19 @@ class PeriscopeUserIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, user_id)
|
||||
|
||||
broadcast_data = self._parse_json(self._html_search_meta(
|
||||
'broadcast-data', webpage, default='{}'), user_id)
|
||||
username = broadcast_data.get('user', {}).get('display_name')
|
||||
user_broadcasts = self._parse_json(
|
||||
self._html_search_meta('user-broadcasts', webpage, default='{}'),
|
||||
data_store = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
r'data-store=(["\'])(?P<data>.+?)\1',
|
||||
webpage, 'data store', default='{}', group='data')),
|
||||
user_id)
|
||||
|
||||
user = data_store.get('User', {}).get('user', {})
|
||||
title = user.get('display_name') or user.get('username')
|
||||
description = user.get('description')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
|
||||
for broadcast in user_broadcasts.get('broadcasts', [])]
|
||||
for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
|
||||
|
||||
return self.playlist_result(entries, user_id, username)
|
||||
return self.playlist_result(entries, user_id, title, description)
|
||||
|
@@ -4,9 +4,8 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,6 +22,19 @@ class PlaywireIE(InfoExtractor):
|
||||
'duration': 145.94,
|
||||
},
|
||||
}, {
|
||||
# m3u8 in f4m
|
||||
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
|
||||
'info_dict': {
|
||||
'id': '4840492',
|
||||
'ext': 'mp4',
|
||||
'title': 'ITV EL SHOW FULL',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Multiple resolutions while bitrates missing
|
||||
'url': 'http://cdn.playwire.com/11625/embed/85228.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -48,25 +60,10 @@ class PlaywireIE(InfoExtractor):
|
||||
thumbnail = content.get('poster')
|
||||
src = content['media']['f4m']
|
||||
|
||||
f4m = self._download_xml(src, video_id)
|
||||
base_url = xpath_text(f4m, './{http://ns.adobe.com/f4m/1.0}baseURL', 'base url', fatal=True)
|
||||
formats = []
|
||||
for media in f4m.findall('./{http://ns.adobe.com/f4m/1.0}media'):
|
||||
media_url = media.get('url')
|
||||
if not media_url:
|
||||
continue
|
||||
tbr = int_or_none(media.get('bitrate'))
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
f = {
|
||||
'url': '%s/%s' % (base_url, media.attrib['url']),
|
||||
'tbr': tbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
}
|
||||
if not (tbr or width or height):
|
||||
f['quality'] = 1 if '-hd.' in media_url else 0
|
||||
formats.append(f)
|
||||
formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
if not dict_get(a_format, ['tbr', 'width', 'height']):
|
||||
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
130
youtube_dl/extractor/radiocanada.py
Normal file
130
youtube_dl/extractor/radiocanada.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
find_xpath_attr,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
xpath_element,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class RadioCanadaIE(InfoExtractor):
|
||||
IE_NAME = 'radiocanada'
|
||||
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
|
||||
'info_dict': {
|
||||
'id': '7184272',
|
||||
'ext': 'flv',
|
||||
'title': 'Le parcours du tireur capté sur vidéo',
|
||||
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
|
||||
'upload_date': '20141023',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
app_code, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
formats = []
|
||||
# TODO: extract m3u8 and f4m formats
|
||||
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
|
||||
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
|
||||
for device_type in ('flash',):
|
||||
v_data = self._download_xml(
|
||||
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
|
||||
video_id, note='Downloading %s XML' % device_type, query={
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
'connectionType': 'broadband',
|
||||
'multibitrate': 'true',
|
||||
'deviceType': device_type,
|
||||
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
|
||||
'paysJ391wsHjbOJwvCs26toz': 'CA',
|
||||
'bypasslock': 'NZt5K62gRqfc',
|
||||
})
|
||||
v_url = xpath_text(v_data, 'url')
|
||||
if not v_url:
|
||||
continue
|
||||
if v_url == 'null':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
|
||||
ext = determine_ext(v_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
ext = determine_ext(v_url)
|
||||
bitrates = xpath_element(v_data, 'bitrates')
|
||||
for url_e in bitrates.findall('url'):
|
||||
tbr = int_or_none(url_e.get('bitrate'))
|
||||
if not tbr:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': 'rtmp-%d' % tbr,
|
||||
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
|
||||
'ext': 'flv',
|
||||
'protocol': 'rtmp',
|
||||
'width': int_or_none(url_e.get('width')),
|
||||
'height': int_or_none(url_e.get('height')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._download_xml(
|
||||
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
|
||||
video_id, note='Downloading metadata XML', query={
|
||||
'appCode': app_code,
|
||||
'idMedia': video_id,
|
||||
})
|
||||
|
||||
def get_meta(name):
|
||||
el = find_xpath_attr(metadata, './/Meta', 'name', name)
|
||||
return el.text if el is not None else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': get_meta('Title'),
|
||||
'description': get_meta('Description') or get_meta('ShortDescription'),
|
||||
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
|
||||
'duration': int_or_none(get_meta('length')),
|
||||
'series': get_meta('Emission'),
|
||||
'season_number': int_or_none('SrcSaison'),
|
||||
'episode_number': int_or_none('SrcEpisode'),
|
||||
'upload_date': unified_strdate(get_meta('Date')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class RadioCanadaAudioVideoIE(InfoExtractor):
|
||||
'radiocanada:audiovideo'
|
||||
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
|
||||
'info_dict': {
|
||||
'id': '7527184',
|
||||
'ext': 'flv',
|
||||
'title': 'Barack Obama au Vietnam',
|
||||
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
|
||||
'upload_date': '20160523',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
|
69
youtube_dl/extractor/reuters.py
Normal file
69
youtube_dl/extractor/reuters.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class ReutersIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
|
||||
'md5': '8015113643a0b12838f160b0b81cc2ee',
|
||||
'info_dict': {
|
||||
'id': '368575562',
|
||||
'ext': 'mp4',
|
||||
'title': 'San Francisco police chief resigns',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
|
||||
video_data = js_to_json(self._search_regex(
|
||||
r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
|
||||
webpage, 'video data'))
|
||||
|
||||
def get_json_value(key, fatal=False):
|
||||
return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
|
||||
|
||||
title = unescapeHTML(get_json_value('title', fatal=True))
|
||||
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
|
||||
|
||||
mas_data = self._download_json(
|
||||
'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
|
||||
video_id, transform_source=js_to_json)
|
||||
formats = []
|
||||
for f in mas_data:
|
||||
f_url = f.get('url')
|
||||
if not f_url:
|
||||
continue
|
||||
method = f.get('method')
|
||||
if method == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
container = f.get('container')
|
||||
ext = '3gp' if method == 'mobile' else container
|
||||
formats.append({
|
||||
'format_id': ext,
|
||||
'url': f_url,
|
||||
'ext': ext,
|
||||
'container': container if method != 'mobile' else None,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': get_json_value('thumb'),
|
||||
'duration': int_or_none(get_json_value('seconds')),
|
||||
'formats': formats,
|
||||
}
|
@@ -64,7 +64,7 @@ def _decrypt_url(png):
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
@@ -87,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
|
86
youtube_dl/extractor/sendtonews.py
Normal file
86
youtube_dl/extractor/sendtonews.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .jwplatform import JWPlatformBaseIE
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class SendtoNewsIE(JWPlatformBaseIE):
|
||||
_VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
|
||||
|
||||
_TEST = {
|
||||
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
|
||||
'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
|
||||
'info_dict': {
|
||||
'id': 'GxfCe0Zo7D-175909-5588',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recap: CLE 15, CIN 6',
|
||||
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
|
||||
'duration': 49,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
|
||||
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
|
||||
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
|
||||
\1>''', webpage)
|
||||
if mobj:
|
||||
sk, mk, pk = mobj.group('SC').split('-')
|
||||
return cls._URL_TEMPLATE % (sk, mk, pk)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
params = compat_parse_qs(mobj.group('query'))
|
||||
|
||||
if 'SK' not in params or 'MK' not in params or 'PK' not in params:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplayer_data_str = self._search_regex(
|
||||
r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
|
||||
js_vars = {
|
||||
'w': 1024,
|
||||
'h': 768,
|
||||
'modeVar': 'html5',
|
||||
}
|
||||
for name, val in js_vars.items():
|
||||
js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
|
||||
jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
|
||||
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
self._parse_json(jwplayer_data_str, video_id),
|
||||
video_id, require_title=False, rtmp_params={'no_resume': True})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
|
||||
description = self._html_search_regex(
|
||||
r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
duration = parse_duration(self._html_search_regex(
|
||||
r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
|
||||
'duration', fatal=False))
|
||||
|
||||
info_dict.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
})
|
||||
|
||||
return info_dict
|
@@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
|
||||
'md5': '3d6361864d7cac20b57c8784da17166f',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
@@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
class TF1IE(InfoExtractor):
|
||||
"""TF1 uses the wat.tv player."""
|
||||
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html'
|
||||
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
|
||||
'info_dict': {
|
||||
@@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
wat_id = self._html_search_regex(
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1',
|
||||
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1',
|
||||
webpage, 'wat id', group='id')
|
||||
return self.url_result('wat:%s' % wat_id, 'Wat')
|
||||
|
@@ -151,6 +151,22 @@ class ThePlatformIE(ThePlatformBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
m = re.search(
|
||||
r'''(?x)
|
||||
<meta\s+
|
||||
property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
|
||||
content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
|
||||
''', webpage)
|
||||
if m:
|
||||
return [m.group('url')]
|
||||
|
||||
matches = re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||
if matches:
|
||||
return list(zip(*matches))[1]
|
||||
|
||||
@staticmethod
|
||||
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
|
||||
flags = '10' if include_qs else '00'
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
@@ -6,20 +6,13 @@ import re
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TvpIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl'
|
||||
_VALID_URL = r'https?://(?:vod|www)\.tvp\.pl/.*/(?P<id>\d+)$'
|
||||
class TVPIE(InfoExtractor):
|
||||
IE_NAME = 'tvp'
|
||||
IE_DESC = 'Telewizja Polska'
|
||||
_VALID_URL = r'https?://[^/]+\.tvp\.(?:pl|info)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem/wideo/odc-2/4278035',
|
||||
'md5': 'cdd98303338b8a7f7abab5cd14092bf2',
|
||||
'info_dict': {
|
||||
'id': '4278035',
|
||||
'ext': 'wmv',
|
||||
'title': 'Ogniem i mieczem, odc. 2',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/czas-honoru/sezon-1-1-13/i-seria-odc-13/194536',
|
||||
'url': 'http://vod.tvp.pl/194536/i-seria-odc-13',
|
||||
'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
|
||||
'info_dict': {
|
||||
'id': '194536',
|
||||
@@ -36,12 +29,22 @@ class TvpIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||
'md5': 'c3b15ed1af288131115ff17a17c19dda',
|
||||
'info_dict': {
|
||||
'id': '17834272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Na sygnale, odc. 39',
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://wiadomosci.tvp.pl/25169746/24052016-1200',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://krakow.tvp.pl/25511623/25lecie-mck-wyjatkowe-miejsce-na-mapie-krakowa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://teleexpress.tvp.pl/25522307/wierni-wzieli-udzial-w-procesjach',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sport.tvp.pl/25522165/krychowiak-uspokaja-w-sprawie-kontuzji-dwa-tygodnie-to-maksimum',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tvp.info/25511919/trwa-rewolucja-wladza-zdecydowala-sie-na-pogwalcenie-konstytucji',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -92,8 +95,8 @@ class TvpIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TvpSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp.pl:Series'
|
||||
class TVPSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'tvp:series'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -127,7 +130,7 @@ class TvpSeriesIE(InfoExtractor):
|
||||
videos_paths = re.findall(
|
||||
'(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
|
||||
entries = [
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TvpIE.ie_key())
|
||||
self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
|
||||
for v_path in videos_paths]
|
||||
|
||||
return {
|
||||
|
@@ -47,7 +47,8 @@ class TwentyFourVideoIE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_regex(
|
||||
r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
|
||||
r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
|
||||
webpage, 'description', fatal=False, group='description')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, 'duration', fatal=False))
|
||||
|
@@ -142,7 +142,9 @@ class UdemyIE(InfoExtractor):
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(p in webpage for p in ['href="https://www.udemy.com/user/logout/', '>Logout<'])
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
|
||||
r'>Logout<'))
|
||||
|
||||
# already logged in
|
||||
if is_logged(login_popup):
|
||||
|
@@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
|
||||
'uploader': 'afp-news',
|
||||
'duration': 123,
|
||||
},
|
||||
'skip': 'This video has been deleted.',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
|
||||
|
@@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
|
||||
'md5': 'e9d77741f9e42ba583e683cd170660f7',
|
||||
'info_dict': {
|
||||
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
|
||||
'ext': 'flv',
|
||||
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
|
||||
'duration': 725.983,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.vice.com/video/how-to-hack-a-car',
|
||||
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
|
||||
@@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
|
||||
'uploader': 'Motherboard',
|
||||
'upload_date': '20140529',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
|
||||
'only_matching': True,
|
||||
|
@@ -217,7 +217,6 @@ class VKIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('videoid')
|
||||
|
||||
info_url = url
|
||||
if video_id:
|
||||
info_url = 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
|
||||
# Some videos (removed?) can only be downloaded with list id specified
|
||||
|
@@ -1,8 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import division, unicode_literals
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@@ -23,7 +22,7 @@ class VLiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "[V] Girl's Day's Broadcast",
|
||||
'title': "[V LIVE] Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
},
|
||||
@@ -35,24 +34,11 @@ class VLiveIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
'http://www.vlive.tv/video/%s' % video_id, video_id)
|
||||
|
||||
# UTC+x - UTC+9 (KST)
|
||||
tz = time.altzone if time.localtime().tm_isdst == 1 else time.timezone
|
||||
tz_offset = -tz // 60 - 9 * 60
|
||||
self._set_cookie('vlive.tv', 'timezoneOffset', '%d' % tz_offset)
|
||||
|
||||
status_params = self._download_json(
|
||||
'http://www.vlive.tv/video/status?videoSeq=%s' % video_id,
|
||||
video_id, 'Downloading JSON status',
|
||||
headers={'Referer': url.encode('utf-8')})
|
||||
status = status_params.get('status')
|
||||
air_start = status_params.get('onAirStartAt', '')
|
||||
is_live = status_params.get('isLive')
|
||||
|
||||
video_params = self._search_regex(
|
||||
r'vlive\.tv\.video\.ajax\.request\.handler\.init\((.+)\)',
|
||||
r'\bvlive\.video\.init\(([^)]+)\)',
|
||||
webpage, 'video params')
|
||||
live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[1:4]
|
||||
status, _, _, live_params, long_video_id, key = re.split(
|
||||
r'"\s*,\s*"', video_params)[2:8]
|
||||
|
||||
if status == 'LIVE_ON_AIR' or status == 'BIG_EVENT_ON_AIR':
|
||||
live_params = self._parse_json('"%s"' % live_params, video_id)
|
||||
@@ -61,8 +47,6 @@ class VLiveIE(InfoExtractor):
|
||||
elif status == 'VOD_ON_AIR' or status == 'BIG_EVENT_INTRO':
|
||||
if long_video_id and key:
|
||||
return self._replay(video_id, webpage, long_video_id, key)
|
||||
elif is_live:
|
||||
status = 'LIVE_END'
|
||||
else:
|
||||
status = 'COMING_SOON'
|
||||
|
||||
@@ -70,7 +54,7 @@ class VLiveIE(InfoExtractor):
|
||||
raise ExtractorError('Uploading for replay. Please wait...',
|
||||
expected=True)
|
||||
elif status == 'COMING_SOON':
|
||||
raise ExtractorError('Coming soon! %s' % air_start, expected=True)
|
||||
raise ExtractorError('Coming soon!', expected=True)
|
||||
elif status == 'CANCELED':
|
||||
raise ExtractorError('We are sorry, '
|
||||
'but the live broadcast has been canceled.',
|
||||
|
@@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Google\'s new material design direction',
|
||||
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# data-ooyala-id
|
||||
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
|
||||
@@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The Nexus 6: hands-on with Google\'s phablet',
|
||||
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# volume embed
|
||||
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
|
||||
@@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'The new frontier of LGBTQ civil rights, explained',
|
||||
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
|
||||
@@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
|
||||
'upload_date': '20160324',
|
||||
'uploader_id': 'voxdotcom',
|
||||
'uploader': 'Vox',
|
||||
}
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
|
||||
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
|
||||
@@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
|
||||
volume_webpage = self._download_webpage(
|
||||
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
|
||||
for provider_video_type in ('ooyala', 'youtube'):
|
||||
provider_video_id = video_data.get('%s_id' % provider_video_type)
|
||||
if provider_video_id:
|
||||
|
@@ -11,7 +11,96 @@ from ..utils import (
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
|
||||
IE_NAME = 'washingtonpost'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TEST = {
|
||||
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
|
||||
'info_dict': {
|
||||
'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Egypt finds belongings, debris from plane crash',
|
||||
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
|
||||
'upload_date': '20160520',
|
||||
'uploader': 'Reuters',
|
||||
'timestamp': 1463778452,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
|
||||
video_id, transform_source=strip_jsonp)[0]['contentConfig']
|
||||
title = video_data['title']
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video_data.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
video_type = s.get('type')
|
||||
if video_type == 'smil':
|
||||
continue
|
||||
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
width = m3u8_format.get('width')
|
||||
if not width:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
m3u8_format.update({
|
||||
'vbr': int_or_none(vbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
width = int_or_none(s.get('width'))
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
has_width = width != 0
|
||||
formats.append({
|
||||
'format_id': (
|
||||
'%s-%d-%d' % (video_type, width, vbr)
|
||||
if width
|
||||
else video_type),
|
||||
'vbr': vbr if has_width else None,
|
||||
'width': width,
|
||||
'height': int_or_none(s.get('height')),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if has_width else 'none',
|
||||
'filesize': int_or_none(s.get('fileSize')),
|
||||
'url': s_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
|
||||
})
|
||||
source_media_url = video_data.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('blurb'),
|
||||
'uploader': video_data.get('credits', {}).get('source'),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('videoDuration'), 100),
|
||||
'timestamp': int_or_none(
|
||||
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
|
||||
}
|
||||
|
||||
|
||||
class WashingtonPostArticleIE(InfoExtractor):
|
||||
IE_NAME = 'washingtonpost:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
|
||||
'info_dict': {
|
||||
@@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
|
||||
}]
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
@@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
|
||||
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
|
||||
data-video-uuid=
|
||||
)"([^"]+)"''', webpage)
|
||||
entries = []
|
||||
for i, uuid in enumerate(uuids, start=1):
|
||||
vinfo_all = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
|
||||
page_id,
|
||||
transform_source=strip_jsonp,
|
||||
note='Downloading information of video %d/%d' % (i, len(uuids))
|
||||
)
|
||||
vinfo = vinfo_all[0]['contentConfig']
|
||||
uploader = vinfo.get('credits', {}).get('source')
|
||||
timestamp = int_or_none(
|
||||
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
|
||||
|
||||
formats = [{
|
||||
'format_id': (
|
||||
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
|
||||
if s.get('width')
|
||||
else s.get('type')),
|
||||
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
|
||||
'width': s.get('width'),
|
||||
'height': s.get('height'),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
|
||||
'filesize': s.get('fileSize'),
|
||||
'url': s.get('url'),
|
||||
'ext': 'mp4',
|
||||
'preference': -100 if s.get('type') == 'smil' else None,
|
||||
'protocol': {
|
||||
'MP4': 'http',
|
||||
'F4F': 'f4m',
|
||||
}.get(s.get('type')),
|
||||
} for s in vinfo.get('streams', [])]
|
||||
source_media_url = vinfo.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': uuid,
|
||||
'title': vinfo['title'],
|
||||
'description': vinfo.get('blurb'),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(vinfo.get('videoDuration'), 100),
|
||||
'timestamp': timestamp,
|
||||
})
|
||||
entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
|
@@ -2,25 +2,26 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)'
|
||||
_VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
|
||||
IE_NAME = 'wat.tv'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
|
||||
'md5': 'ce70e9223945ed26a8056d413ca55dc9',
|
||||
'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
|
||||
'info_dict': {
|
||||
'id': '11713067',
|
||||
'display_id': 'soupe-figues-l-orange-aux-epices',
|
||||
'ext': 'mp4',
|
||||
'title': 'Soupe de figues à l\'orange et aux épices',
|
||||
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
|
||||
@@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
|
||||
'md5': 'fbc84e4378165278e743956d9c1bf16b',
|
||||
'info_dict': {
|
||||
'id': '11713075',
|
||||
'display_id': 'gregory-lemarchal-voix-ange',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
|
||||
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
|
||||
@@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def download_video_info(self, real_id):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||
|
||||
# 'contentv4' is used in the website, but it also returns the related
|
||||
# videos, we don't need them
|
||||
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id)
|
||||
return info['media']
|
||||
|
||||
def _real_extract(self, url):
|
||||
def real_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
real_id = mobj.group('real_id')
|
||||
if not real_id:
|
||||
short_id = mobj.group('short_id')
|
||||
webpage = self._download_webpage(url, display_id or short_id)
|
||||
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
|
||||
|
||||
video_info = self.download_video_info(real_id)
|
||||
video_info = self._download_json(
|
||||
'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
geo_list = video_info.get('geoList')
|
||||
country = geo_list[0] if geo_list else ''
|
||||
|
||||
chapters = video_info['chapters']
|
||||
first_chapter = chapters[0]
|
||||
files = video_info['files']
|
||||
first_file = files[0]
|
||||
|
||||
if real_id_for_chapter(first_chapter) != real_id:
|
||||
def video_id_for_chapter(chapter):
|
||||
return chapter['tc_start'].split('-')[0]
|
||||
|
||||
if video_id_for_chapter(first_chapter) != video_id:
|
||||
self.to_screen('Multipart video detected')
|
||||
chapter_urls = []
|
||||
for chapter in chapters:
|
||||
chapter_id = real_id_for_chapter(chapter)
|
||||
# Yes, when we this chapter is processed by WatIE,
|
||||
# it will download the info again
|
||||
chapter_info = self.download_video_info(chapter_id)
|
||||
chapter_urls.append(chapter_info['url'])
|
||||
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
|
||||
return self.playlist_result(entries, real_id, video_info['title'])
|
||||
|
||||
upload_date = None
|
||||
if 'date_diffusion' in first_chapter:
|
||||
upload_date = unified_strdate(first_chapter['date_diffusion'])
|
||||
entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
|
||||
return self.playlist_result(entries, video_id, video_info['title'])
|
||||
# Otherwise we can continue and extract just one part, we have to use
|
||||
# the short id for getting the video url
|
||||
# the video id for getting the video url
|
||||
|
||||
formats = [{
|
||||
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id,
|
||||
'format_id': 'Mobile',
|
||||
}]
|
||||
date_diffusion = first_chapter.get('date_diffusion')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
|
||||
fmts = [('SD', 'web')]
|
||||
if first_file.get('hasHD'):
|
||||
fmts.append(('HD', 'webhd'))
|
||||
def extract_url(path_template, url_type):
|
||||
req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
|
||||
head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
|
||||
red_url = head.geturl()
|
||||
if req_url == red_url:
|
||||
raise ExtractorError(
|
||||
'%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
|
||||
expected=True)
|
||||
return red_url
|
||||
|
||||
def compute_token(param):
|
||||
timestamp = '%08x' % int(self._download_webpage(
|
||||
'http://www.wat.tv/servertime', real_id,
|
||||
'Downloading server time').split('|')[0])
|
||||
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
|
||||
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
|
||||
m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
|
||||
http_url = extract_url('android5/%s.mp4', 'http')
|
||||
|
||||
for fmt in fmts:
|
||||
webid = '/%s/%s' % (fmt[1], real_id)
|
||||
video_url = self._download_webpage(
|
||||
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country),
|
||||
real_id,
|
||||
'Downloading %s video URL' % fmt[0],
|
||||
'Failed to download %s video URL' % fmt[0],
|
||||
False)
|
||||
if not video_url:
|
||||
formats = []
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
for m3u8_format in m3u8_formats:
|
||||
mobj = re.search(
|
||||
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
|
||||
if not mobj:
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'format_id': fmt[0],
|
||||
abr, vbr = mobj.groups()
|
||||
abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
|
||||
m3u8_format.update({
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
if not vbr or not abr:
|
||||
continue
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': real_id,
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': first_chapter['title'],
|
||||
'thumbnail': first_chapter['preview'],
|
||||
'description': first_chapter['description'],
|
||||
'view_count': video_info['views'],
|
||||
'upload_date': upload_date,
|
||||
'duration': first_file['duration'],
|
||||
'duration': video_info['files'][0]['duration'],
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,16 +3,17 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
|
||||
_API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json'
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
|
||||
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
||||
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'md5': 'cafeb56ec0c53c18c97405eecb3133df',
|
||||
'info_dict': {
|
||||
@@ -24,36 +25,54 @@ class WistiaIE(InfoExtractor):
|
||||
'timestamp': 1386185018,
|
||||
'duration': 117,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
request = sanitized_Request(self._API_URL.format(video_id))
|
||||
request.add_header('Referer', url) # Some videos require this.
|
||||
data_json = self._download_json(request, video_id)
|
||||
data_json = self._download_json(
|
||||
self._API_URL % video_id, video_id,
|
||||
# Some videos require this.
|
||||
headers={
|
||||
'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
|
||||
})
|
||||
|
||||
if data_json.get('error'):
|
||||
raise ExtractorError('Error while getting the playlist',
|
||||
expected=True)
|
||||
raise ExtractorError(
|
||||
'Error while getting the playlist', expected=True)
|
||||
|
||||
data = data_json['media']
|
||||
title = data['name']
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for a in data['assets']:
|
||||
aurl = a.get('url')
|
||||
if not aurl:
|
||||
continue
|
||||
astatus = a.get('status')
|
||||
atype = a.get('type')
|
||||
if (astatus is not None and astatus != 2) or atype == 'preview':
|
||||
if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
|
||||
continue
|
||||
elif atype in ('still', 'still_image'):
|
||||
thumbnails.append({
|
||||
'url': a['url'],
|
||||
'resolution': '%dx%d' % (a['width'], a['height']),
|
||||
'url': aurl,
|
||||
'width': int_or_none(a.get('width')),
|
||||
'height': int_or_none(a.get('height')),
|
||||
})
|
||||
else:
|
||||
aext = a.get('ext')
|
||||
is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
|
||||
formats.append({
|
||||
'format_id': atype,
|
||||
'url': a['url'],
|
||||
'url': aurl,
|
||||
'tbr': int_or_none(a.get('bitrate')),
|
||||
'vbr': int_or_none(a.get('opt_vbitrate')),
|
||||
'width': int_or_none(a.get('width')),
|
||||
@@ -61,7 +80,8 @@ class WistiaIE(InfoExtractor):
|
||||
'filesize': int_or_none(a.get('size')),
|
||||
'vcodec': a.get('codec'),
|
||||
'container': a.get('container'),
|
||||
'ext': a.get('ext'),
|
||||
'ext': 'mp4' if is_m3u8 else aext,
|
||||
'protocol': 'm3u8' if is_m3u8 else None,
|
||||
'preference': 1 if atype == 'original' else None,
|
||||
})
|
||||
|
||||
@@ -73,6 +93,6 @@ class WistiaIE(InfoExtractor):
|
||||
'description': data.get('seoDescription'),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': int_or_none(data.get('duration')),
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
'timestamp': int_or_none(data.get('createdAt')),
|
||||
}
|
||||
|
@@ -12,37 +12,52 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'ext': 'mp4',
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'upload_date': '20121014',
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893.52,
|
||||
'age_limit': 18,
|
||||
}
|
||||
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
|
||||
'md5': '8281348b8d3c53d39fffb377d24eac4e',
|
||||
'info_dict': {
|
||||
'id': '1509445',
|
||||
'ext': 'mp4',
|
||||
'title': 'FemaleAgent Shy beauty takes the bait',
|
||||
'upload_date': '20121014',
|
||||
'uploader': 'Ruseful2011',
|
||||
'duration': 893.52,
|
||||
'age_limit': 18,
|
||||
},
|
||||
{
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'info_dict': {
|
||||
'id': '2221348',
|
||||
'ext': 'mp4',
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'upload_date': '20130914',
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200.48,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
|
||||
'info_dict': {
|
||||
'id': '2221348',
|
||||
'ext': 'mp4',
|
||||
'title': 'Britney Spears Sexy Booty',
|
||||
'upload_date': '20130914',
|
||||
'uploader': 'jojo747400',
|
||||
'duration': 200.48,
|
||||
'age_limit': 18,
|
||||
},
|
||||
{
|
||||
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||
'only_matching': True,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
]
|
||||
}, {
|
||||
# empty seo
|
||||
'url': 'http://xhamster.com/movies/5667973/.html',
|
||||
'info_dict': {
|
||||
'id': '5667973',
|
||||
'ext': 'mp4',
|
||||
'title': '....',
|
||||
'upload_date': '20160208',
|
||||
'uploader': 'parejafree',
|
||||
'duration': 72.0,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
def extract_video_url(webpage, name):
|
||||
@@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id,
|
||||
r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
|
||||
webpage, 'xhamster url', default=None)
|
||||
|
||||
if not video_url:
|
||||
|
@@ -275,6 +275,8 @@ class YoukuIE(InfoExtractor):
|
||||
'format_id': self.get_format_name(fm),
|
||||
'ext': self.parse_ext_l(fm),
|
||||
'filesize': int(seg['size']),
|
||||
'width': stream.get('width'),
|
||||
'height': stream.get('height'),
|
||||
})
|
||||
|
||||
return {
|
||||
|
@@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener):
|
||||
|
||||
print_notes(to_screen, versions_info['versions'])
|
||||
|
||||
filename = sys.argv[0]
|
||||
# Py2EXE: Filename could be different
|
||||
if hasattr(sys, 'frozen') and not os.path.isfile(filename):
|
||||
if os.path.isfile(filename + '.exe'):
|
||||
filename += '.exe'
|
||||
# sys.executable is set to the full pathname of the exe-file for py2exe
|
||||
filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
|
||||
|
||||
if not os.access(filename, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % filename)
|
||||
@@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener):
|
||||
|
||||
# Py2EXE
|
||||
if hasattr(sys, 'frozen'):
|
||||
exe = os.path.abspath(filename)
|
||||
exe = filename
|
||||
directory = os.path.dirname(exe)
|
||||
if not os.access(directory, os.W_OK):
|
||||
to_screen('ERROR: no write permissions on %s' % directory)
|
||||
|
@@ -883,12 +883,17 @@ def make_socks_conn_class(base_class, socks_proxy):
|
||||
elif url_components.scheme.lower() == 'socks4a':
|
||||
socks_type = ProxyType.SOCKS4A
|
||||
|
||||
def unquote_if_non_empty(s):
|
||||
if not s:
|
||||
return s
|
||||
return compat_urllib_parse_unquote_plus(s)
|
||||
|
||||
proxy_args = (
|
||||
socks_type,
|
||||
url_components.hostname, url_components.port or 1080,
|
||||
True, # Remote DNS
|
||||
compat_urllib_parse_unquote_plus(url_components.username),
|
||||
compat_urllib_parse_unquote_plus(url_components.password),
|
||||
unquote_if_non_empty(url_components.username),
|
||||
unquote_if_non_empty(url_components.password),
|
||||
)
|
||||
|
||||
class SocksConnection(base_class):
|
||||
@@ -1030,6 +1035,7 @@ def unified_strdate(date_str, day_first=True):
|
||||
format_expressions.extend([
|
||||
'%d-%m-%Y',
|
||||
'%d.%m.%Y',
|
||||
'%d.%m.%y',
|
||||
'%d/%m/%Y',
|
||||
'%d/%m/%y',
|
||||
'%d/%m/%Y %H:%M:%S',
|
||||
@@ -1050,7 +1056,10 @@ def unified_strdate(date_str, day_first=True):
|
||||
if upload_date is None:
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
try:
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
except ValueError:
|
||||
pass
|
||||
if upload_date is not None:
|
||||
return compat_str(upload_date)
|
||||
|
||||
@@ -1544,15 +1553,11 @@ def setproctitle(title):
|
||||
|
||||
|
||||
def remove_start(s, start):
|
||||
if s.startswith(start):
|
||||
return s[len(start):]
|
||||
return s
|
||||
return s[len(start):] if s is not None and s.startswith(start) else s
|
||||
|
||||
|
||||
def remove_end(s, end):
|
||||
if s.endswith(end):
|
||||
return s[:-len(end)]
|
||||
return s
|
||||
return s[:-len(end)] if s is not None and s.endswith(end) else s
|
||||
|
||||
|
||||
def remove_quotes(s):
|
||||
@@ -1906,7 +1911,7 @@ def parse_age_limit(s):
|
||||
|
||||
def strip_jsonp(code):
|
||||
return re.sub(
|
||||
r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
|
||||
|
||||
|
||||
def js_to_json(code):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2016.05.16'
|
||||
__version__ = '2016.05.30'
|
||||
|
Reference in New Issue
Block a user