mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-23 16:48:37 +09:00
Compare commits
75 Commits
8a3797a4ab
...
2021.01.24
Author | SHA1 | Date | |
---|---|---|---|
![]() |
b63981e850 | ||
![]() |
186cbaffb9 | ||
![]() |
dbf3fa8af6 | ||
![]() |
f08c31cf33 | ||
![]() |
d8dab85419 | ||
![]() |
5519bba3e1 | ||
![]() |
142c584063 | ||
![]() |
4542e3e555 | ||
![]() |
fa8f6d8580 | ||
![]() |
3bb7769c40 | ||
![]() |
8d286bd5b6 | ||
![]() |
cff72b4cc0 | ||
![]() |
657221c81d | ||
![]() |
62acf5fa2c | ||
![]() |
b79977fb6b | ||
![]() |
bc7c8f3d4e | ||
![]() |
015e19b350 | ||
![]() |
54856480d7 | ||
![]() |
1dd12708c2 | ||
![]() |
f9201cef58 | ||
![]() |
26499ba823 | ||
![]() |
58f6c2112d | ||
![]() |
de026a6acd | ||
![]() |
d4564afc70 | ||
![]() |
360a5e0f60 | ||
![]() |
55a3ca16d3 | ||
![]() |
ef50cb3fda | ||
![]() |
8673f4344c | ||
![]() |
f1487d4fca | ||
![]() |
0cd4c402f0 | ||
![]() |
9c9b458145 | ||
![]() |
9d50f86232 | ||
![]() |
7e92f9015e | ||
![]() |
aa860b8016 | ||
![]() |
b484097b01 | ||
![]() |
ab9001dab5 | ||
![]() |
879866a230 | ||
![]() |
8e5477d036 | ||
![]() |
1e8e5d5238 | ||
![]() |
d81a213cfb | ||
![]() |
7c2d18a13f | ||
![]() |
2408e6d26a | ||
![]() |
cf862771d7 | ||
![]() |
a938f111ed | ||
![]() |
4759543f6e | ||
![]() |
d0fc289f45 | ||
![]() |
70f572585d | ||
![]() |
c2d06aef60 | ||
![]() |
ff1e765400 | ||
![]() |
170e1c1995 | ||
![]() |
61e669acff | ||
![]() |
2c337f4e85 | ||
![]() |
bf6a74c620 | ||
![]() |
38a967c98e | ||
![]() |
3a61e6d360 | ||
![]() |
3d8e32dcc0 | ||
![]() |
8f29b2dd38 | ||
![]() |
a29e340efa | ||
![]() |
b13f29098f | ||
![]() |
430c4bc9d0 | ||
![]() |
4ae243fc6c | ||
![]() |
8f20ad36dc | ||
![]() |
799c794947 | ||
![]() |
1ae7ae0b96 | ||
![]() |
ccc7112291 | ||
![]() |
5b24f8f505 | ||
![]() |
fcd90d2583 | ||
![]() |
8f757c7353 | ||
![]() |
be1a3f2d11 | ||
![]() |
ecae54a98d | ||
![]() |
f318882955 | ||
![]() |
c3399cac19 | ||
![]() |
9237aaa77f | ||
![]() |
766fcdd0fa | ||
![]() |
f6ea29e24b |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24.1**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.01.03
|
||||
[debug] youtube-dl version 2021.01.24.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24.1**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24.1**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24.1**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.01.03
|
||||
[debug] youtube-dl version 2021.01.24.1
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24.1**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
30
.github/workflows/ci.yml
vendored
30
.github/workflows/ci.yml
vendored
@@ -1,5 +1,5 @@
|
||||
name: CI
|
||||
on: [push]
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
tests:
|
||||
name: Tests
|
||||
@@ -7,31 +7,55 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
os: [ubuntu-18.04]
|
||||
# TODO: python 2.6
|
||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
||||
python-impl: [cpython]
|
||||
ytdl-test-set: [core, download]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: bat
|
||||
# jython
|
||||
- os: ubuntu-18.04
|
||||
python-impl: jython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: sh
|
||||
- os: ubuntu-18.04
|
||||
python-impl: jython
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: sh
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
if: ${{ matrix.python-impl == 'cpython' }}
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Set up Java 8
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 8
|
||||
- name: Install Jython
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
run: |
|
||||
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Run tests
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' }}
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||
env:
|
||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
|
50
.travis.yml
50
.travis.yml
@@ -1,50 +0,0 @@
|
||||
language: python
|
||||
python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
# - YTDL_TEST_SET=download
|
||||
jobs:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
# - python: 3.7
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- python: 3.8
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
# - python: 3.8
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
# - python: 3.8-dev
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||
- name: flake8
|
||||
python: 3.8
|
||||
dist: xenial
|
||||
install: pip install flake8
|
||||
script: flake8 .
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
# - env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
script: ./devscripts/run_tests.sh
|
90
ChangeLog
90
ChangeLog
@@ -1,3 +1,93 @@
|
||||
version 2021.01.24
|
||||
|
||||
Core
|
||||
* Introduce --output-na-placeholder (#27896)
|
||||
|
||||
Extractors
|
||||
* [franceculture] Make thumbnail optional (#18807)
|
||||
* [franceculture] Fix extraction (#27891, #27903)
|
||||
* [njpwworld] Fix extraction (#27890)
|
||||
* [comedycentral] Fix extraction (#27905)
|
||||
* [wat] Fix format extraction (#27901)
|
||||
+ [americastestkitchen:season] Add support for seasons (#27861)
|
||||
+ [trovo] Add support for trovo.live (#26125)
|
||||
+ [aol] Add support for yahoo videos (#26650)
|
||||
* [yahoo] Fix single video extraction
|
||||
* [lbry] Unescape lbry URI (#27872)
|
||||
* [9gag] Fix and improve extraction (#23022)
|
||||
* [americastestkitchen] Improve metadata extraction for ATK episodes (#27860)
|
||||
* [aljazeera] Fix extraction (#20911, #27779)
|
||||
+ [minds] Add support for minds.com (#17934)
|
||||
* [ard] Fix title and description extraction (#27761)
|
||||
+ [spotify] Add support for Spotify Podcasts (#27443)
|
||||
|
||||
|
||||
version 2021.01.16
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Protect from infinite recursion due to recursively nested
|
||||
playlists (#27833)
|
||||
* [YoutubeDL] Ignore failure to create existing directory (#27811)
|
||||
* [YoutubeDL] Raise syntax error for format selection expressions with multiple
|
||||
+ operators (#27803)
|
||||
|
||||
Extractors
|
||||
+ [animeondemand] Add support for lazy playlist extraction (#27829)
|
||||
* [youporn] Restrict fallback download URL (#27822)
|
||||
* [youporn] Improve height and tbr extraction (#20425, #23659)
|
||||
* [youporn] Fix extraction (#27822)
|
||||
+ [twitter] Add support for unified cards (#27826)
|
||||
+ [twitch] Add Authorization header with OAuth token for GraphQL requests
|
||||
(#27790)
|
||||
* [mixcloud:playlist:base] Extract video id in flat playlist mode (#27787)
|
||||
* [cspan] Improve info extraction (#27791)
|
||||
* [adn] Improve info extraction
|
||||
* [adn] Fix extraction (#26963, #27732)
|
||||
* [youtube:search] Extract from all sections (#27604)
|
||||
* [youtube:search] fix viewcount and try to extract all video sections (#27604)
|
||||
* [twitch] Improve login error extraction
|
||||
* [twitch] Fix authentication (#27743)
|
||||
* [3qsdn] Improve extraction (#21058)
|
||||
* [peertube] Extract formats from streamingPlaylists (#26002, #27586, #27728)
|
||||
* [khanacademy] Fix extraction (#2887, #26803)
|
||||
* [spike] Update Paramount Network feed URL (#27715)
|
||||
|
||||
|
||||
version 2021.01.08
|
||||
|
||||
Core
|
||||
* [downloader/hls] Disable decryption in tests (#27660)
|
||||
+ [utils] Add a function to clean podcast URLs
|
||||
|
||||
Extractors
|
||||
* [rai] Improve subtitles extraction (#27698, #27705)
|
||||
* [canvas] Match only supported VRT NU URLs (#27707)
|
||||
+ [bibeltv] Add support for bibeltv.de (#14361)
|
||||
+ [bfmtv] Add support for bfmtv.com (#16053, #26615)
|
||||
+ [sbs] Add support for ondemand play and news embed URLs (#17650, #27629)
|
||||
* [twitch] Drop legacy kraken API v5 code altogether and refactor
|
||||
* [twitch:vod] Switch to GraphQL for video metadata
|
||||
* [canvas] Fix VRT NU extraction (#26957, #27053)
|
||||
* [twitch] Switch access token to GraphQL and refactor (#27646)
|
||||
+ [rai] Detect ContentItem in iframe (#12652, #27673)
|
||||
* [ketnet] Fix extraction (#27662)
|
||||
+ [dplay] Add suport Discovery+ domains (#27680)
|
||||
* [motherless] Improve extraction (#26495, #27450)
|
||||
* [motherless] Fix recent videos upload date extraction (#27661)
|
||||
* [nrk] Fix extraction for videos without a legalAge rating
|
||||
- [googleplus] Remove extractor (#4955, #7400)
|
||||
+ [applepodcasts] Add support for podcasts.apple.com (#25918)
|
||||
+ [googlepodcasts] Add support for podcasts.google.com
|
||||
+ [iheart] Add support for iheart.com (#27037)
|
||||
* [acast] Clean podcast URLs
|
||||
* [stitcher] Clean podcast URLs
|
||||
+ [xfileshare] Add support for aparat.cam (#27651)
|
||||
+ [twitter] Add support for summary card (#25121)
|
||||
* [twitter] Try to use a Generic fallback for unknown twitter cards (#25982)
|
||||
+ [stitcher] Add support for shows and show metadata extraction (#20510)
|
||||
* [stv] Improve episode id extraction (#23083)
|
||||
|
||||
|
||||
version 2021.01.03
|
||||
|
||||
Extractors
|
||||
|
763
README.md
763
README.md
@@ -52,394 +52,431 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
# OPTIONS
|
||||
-h, --help Print this help text and exit
|
||||
--version Print program version and exit
|
||||
-U, --update Update this program to latest version. Make
|
||||
sure that you have sufficient permissions
|
||||
(run with sudo if needed)
|
||||
-i, --ignore-errors Continue on download errors, for example to
|
||||
skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the
|
||||
playlist or the command line) if an error
|
||||
occurs
|
||||
--dump-user-agent Display the current browser identification
|
||||
--list-extractors List all supported extractors
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors
|
||||
--force-generic-extractor Force extraction to use the generic
|
||||
extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For
|
||||
example "gvsearch2:" downloads two videos
|
||||
from google videos for youtube-dl "large
|
||||
apple". Use the value "auto" to let
|
||||
youtube-dl guess ("auto_warning" to emit a
|
||||
warning when guessing). "error" just throws
|
||||
an error. The default value "fixup_error"
|
||||
repairs broken URLs, but emits an error if
|
||||
this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given
|
||||
in the global configuration file
|
||||
/etc/youtube-dl.conf: Do not read the user
|
||||
configuration in ~/.config/youtube-
|
||||
dl/config (%APPDATA%/youtube-dl/config.txt
|
||||
on Windows)
|
||||
--config-location PATH Location of the configuration file; either
|
||||
the path to the config or its containing
|
||||
directory.
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube only)
|
||||
--no-color Do not emit color codes in output
|
||||
-h, --help Print this help text and exit
|
||||
--version Print program version and exit
|
||||
-U, --update Update this program to latest version.
|
||||
Make sure that you have sufficient
|
||||
permissions (run with sudo if needed)
|
||||
-i, --ignore-errors Continue on download errors, for
|
||||
example to skip unavailable videos in a
|
||||
playlist
|
||||
--abort-on-error Abort downloading of further videos (in
|
||||
the playlist or the command line) if an
|
||||
error occurs
|
||||
--dump-user-agent Display the current browser
|
||||
identification
|
||||
--list-extractors List all supported extractors
|
||||
--extractor-descriptions Output descriptions of all supported
|
||||
extractors
|
||||
--force-generic-extractor Force extraction to use the generic
|
||||
extractor
|
||||
--default-search PREFIX Use this prefix for unqualified URLs.
|
||||
For example "gvsearch2:" downloads two
|
||||
videos from google videos for youtube-
|
||||
dl "large apple". Use the value "auto"
|
||||
to let youtube-dl guess ("auto_warning"
|
||||
to emit a warning when guessing).
|
||||
"error" just throws an error. The
|
||||
default value "fixup_error" repairs
|
||||
broken URLs, but emits an error if this
|
||||
is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When
|
||||
given in the global configuration file
|
||||
/etc/youtube-dl.conf: Do not read the
|
||||
user configuration in
|
||||
~/.config/youtube-dl/config
|
||||
(%APPDATA%/youtube-dl/config.txt on
|
||||
Windows)
|
||||
--config-location PATH Location of the configuration file;
|
||||
either the path to the config or its
|
||||
containing directory.
|
||||
--flat-playlist Do not extract the videos of a
|
||||
playlist, only list them.
|
||||
--mark-watched Mark videos watched (YouTube only)
|
||||
--no-mark-watched Do not mark videos watched (YouTube
|
||||
only)
|
||||
--no-color Do not emit color codes in output
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
To enable SOCKS proxy, specify a proper
|
||||
scheme. For example
|
||||
socks5://127.0.0.1:1080/. Pass in an empty
|
||||
string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS
|
||||
proxy. To enable SOCKS proxy, specify a
|
||||
proper scheme. For example
|
||||
socks5://127.0.0.1:1080/. Pass in an
|
||||
empty string (--proxy "") for direct
|
||||
connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in
|
||||
seconds
|
||||
--source-address IP Client-side IP address to bind to
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
|
||||
## Geo Restriction:
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
option is not present) is used for the
|
||||
actual downloading.
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
explicitly provided two-letter ISO 3166-2
|
||||
country code
|
||||
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
|
||||
explicitly provided IP block in CIDR
|
||||
notation
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address
|
||||
for some geo-restricted sites. The
|
||||
default proxy specified by --proxy (or
|
||||
none, if the option is not present) is
|
||||
used for the actual downloading.
|
||||
--geo-bypass Bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
--no-geo-bypass Do not bypass geographic restriction
|
||||
via faking X-Forwarded-For HTTP header
|
||||
--geo-bypass-country CODE Force bypass geographic restriction
|
||||
with explicitly provided two-letter ISO
|
||||
3166-2 country code
|
||||
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction
|
||||
with explicitly provided IP block in
|
||||
CIDR notation
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER Playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify
|
||||
indices of the videos in the playlist
|
||||
separated by commas like: "--playlist-items
|
||||
1,2,5,8" if you want to download videos
|
||||
indexed 1, 2, 5, 8 in the playlist. You can
|
||||
specify range: "--playlist-items
|
||||
1-3,7,10-13", it will download the videos
|
||||
at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX Download only matching titles (regex or
|
||||
caseless sub-string)
|
||||
--reject-title REGEX Skip download for matching titles (regex or
|
||||
caseless sub-string)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--min-filesize SIZE Do not download any videos smaller than
|
||||
SIZE (e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE
|
||||
(e.g. 50k or 44.6m)
|
||||
--date DATE Download only videos uploaded in this date
|
||||
--datebefore DATE Download only videos uploaded on or before
|
||||
this date (i.e. inclusive)
|
||||
--dateafter DATE Download only videos uploaded on or after
|
||||
this date (i.e. inclusive)
|
||||
--min-views COUNT Do not download any videos with less than
|
||||
COUNT views
|
||||
--max-views COUNT Do not download any videos with more than
|
||||
COUNT views
|
||||
--match-filter FILTER Generic video filter. Specify any key (see
|
||||
the "OUTPUT TEMPLATE" for a list of
|
||||
available keys) to match if the key is
|
||||
present, !key to check if the key is not
|
||||
present, key > NUMBER (like "comment_count
|
||||
> 12", also works with >=, <, <=, !=, =) to
|
||||
compare against a number, key = 'LITERAL'
|
||||
(like "uploader = 'Mike Smith'", also works
|
||||
with !=) to match against a string literal
|
||||
and & to require multiple matches. Values
|
||||
which are not known are excluded unless you
|
||||
put a question mark (?) after the operator.
|
||||
For example, to only match videos that have
|
||||
been liked more than 100 times and disliked
|
||||
less than 50 times (or the dislike
|
||||
functionality is not available at the given
|
||||
service), but who also have a description,
|
||||
use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description" .
|
||||
--no-playlist Download only the video, if the URL refers
|
||||
to a video and a playlist.
|
||||
--yes-playlist Download the playlist, if the URL refers to
|
||||
a video and a playlist.
|
||||
--age-limit YEARS Download only videos suitable for the given
|
||||
age
|
||||
--download-archive FILE Download only videos not listed in the
|
||||
archive file. Record the IDs of all
|
||||
downloaded videos in it.
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
--playlist-start NUMBER Playlist video to start at (default is
|
||||
1)
|
||||
--playlist-end NUMBER Playlist video to end at (default is
|
||||
last)
|
||||
--playlist-items ITEM_SPEC Playlist video items to download.
|
||||
Specify indices of the videos in the
|
||||
playlist separated by commas like: "--
|
||||
playlist-items 1,2,5,8" if you want to
|
||||
download videos indexed 1, 2, 5, 8 in
|
||||
the playlist. You can specify range: "
|
||||
--playlist-items 1-3,7,10-13", it will
|
||||
download the videos at index 1, 2, 3,
|
||||
7, 10, 11, 12 and 13.
|
||||
--match-title REGEX Download only matching titles (regex or
|
||||
caseless sub-string)
|
||||
--reject-title REGEX Skip download for matching titles
|
||||
(regex or caseless sub-string)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--min-filesize SIZE Do not download any videos smaller than
|
||||
SIZE (e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than
|
||||
SIZE (e.g. 50k or 44.6m)
|
||||
--date DATE Download only videos uploaded in this
|
||||
date
|
||||
--datebefore DATE Download only videos uploaded on or
|
||||
before this date (i.e. inclusive)
|
||||
--dateafter DATE Download only videos uploaded on or
|
||||
after this date (i.e. inclusive)
|
||||
--min-views COUNT Do not download any videos with less
|
||||
than COUNT views
|
||||
--max-views COUNT Do not download any videos with more
|
||||
than COUNT views
|
||||
--match-filter FILTER Generic video filter. Specify any key
|
||||
(see the "OUTPUT TEMPLATE" for a list
|
||||
of available keys) to match if the key
|
||||
is present, !key to check if the key is
|
||||
not present, key > NUMBER (like
|
||||
"comment_count > 12", also works with
|
||||
>=, <, <=, !=, =) to compare against a
|
||||
number, key = 'LITERAL' (like "uploader
|
||||
= 'Mike Smith'", also works with !=) to
|
||||
match against a string literal and & to
|
||||
require multiple matches. Values which
|
||||
are not known are excluded unless you
|
||||
put a question mark (?) after the
|
||||
operator. For example, to only match
|
||||
videos that have been liked more than
|
||||
100 times and disliked less than 50
|
||||
times (or the dislike functionality is
|
||||
not available at the given service),
|
||||
but who also have a description, use
|
||||
--match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description" .
|
||||
--no-playlist Download only the video, if the URL
|
||||
refers to a video and a playlist.
|
||||
--yes-playlist Download the playlist, if the URL
|
||||
refers to a video and a playlist.
|
||||
--age-limit YEARS Download only videos suitable for the
|
||||
given age
|
||||
--download-archive FILE Download only videos not listed in the
|
||||
archive file. Record the IDs of all
|
||||
downloaded videos in it.
|
||||
--include-ads Download advertisements as well
|
||||
(experimental)
|
||||
|
||||
## Download Options:
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per second
|
||||
(e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
--fragment-retries RETRIES Number of retries for a fragment (default
|
||||
is 10), or "infinite" (DASH, hlsnative and
|
||||
ISM)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative
|
||||
and ISM)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--keep-fragments Keep downloaded fragments on disk after
|
||||
downloading is finished; fragments are
|
||||
erased by default
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
size. By default, the buffer size is
|
||||
automatically resized from an initial value
|
||||
of SIZE.
|
||||
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
|
||||
downloading (e.g. 10485760 or 10M) (default
|
||||
is disabled). May be useful for bypassing
|
||||
bandwidth throttling imposed by a webserver
|
||||
(experimental)
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
downloader
|
||||
--hls-use-mpegts Use the mpegts container for HLS videos,
|
||||
allowing to play the video while
|
||||
downloading (some players may not be able
|
||||
to play it)
|
||||
--external-downloader COMMAND Use the specified external downloader.
|
||||
Currently supports
|
||||
aria2c,avconv,axel,curl,ffmpeg,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external
|
||||
downloader
|
||||
-r, --limit-rate RATE Maximum download rate in bytes per
|
||||
second (e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or
|
||||
"infinite".
|
||||
--fragment-retries RETRIES Number of retries for a fragment
|
||||
(default is 10), or "infinite" (DASH,
|
||||
hlsnative and ISM)
|
||||
--skip-unavailable-fragments Skip unavailable fragments (DASH,
|
||||
hlsnative and ISM)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is
|
||||
not available
|
||||
--keep-fragments Keep downloaded fragments on disk after
|
||||
downloading is finished; fragments are
|
||||
erased by default
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or
|
||||
16K) (default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
size. By default, the buffer size is
|
||||
automatically resized from an initial
|
||||
value of SIZE.
|
||||
--http-chunk-size SIZE Size of a chunk for chunk-based HTTP
|
||||
downloading (e.g. 10485760 or 10M)
|
||||
(default is disabled). May be useful
|
||||
for bypassing bandwidth throttling
|
||||
imposed by a webserver (experimental)
|
||||
--playlist-reverse Download playlist videos in reverse
|
||||
order
|
||||
--playlist-random Download playlist videos in random
|
||||
order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size
|
||||
--hls-prefer-native Use the native HLS downloader instead
|
||||
of ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
downloader
|
||||
--hls-use-mpegts Use the mpegts container for HLS
|
||||
videos, allowing to play the video
|
||||
while downloading (some players may not
|
||||
be able to play it)
|
||||
--external-downloader COMMAND Use the specified external downloader.
|
||||
Currently supports aria2c,avconv,axel,c
|
||||
url,ffmpeg,httpie,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external
|
||||
downloader
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE File containing URLs to download ('-' for
|
||||
stdin), one URL per line. Lines starting
|
||||
with '#', ';' or ']' are considered as
|
||||
comments and ignored.
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT
|
||||
TEMPLATE" for all the info
|
||||
--autonumber-start NUMBER Specify the start value for %(autonumber)s
|
||||
(default is 1)
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-w, --no-overwrites Do not overwrite files
|
||||
-c, --continue Force resume of partially downloaded files.
|
||||
By default, youtube-dl will resume
|
||||
downloads if possible.
|
||||
--no-continue Do not resume partially downloaded files
|
||||
(restart from beginning)
|
||||
--no-part Do not use .part files - write directly
|
||||
into output file
|
||||
--no-mtime Do not use the Last-modified header to set
|
||||
the file modification time
|
||||
--write-description Write video description to a .description
|
||||
file
|
||||
--write-info-json Write video metadata to a .info.json file
|
||||
--write-annotations Write video annotations to a
|
||||
.annotations.xml file
|
||||
--load-info-json FILE JSON file containing the video information
|
||||
(created with the "--write-info-json"
|
||||
option)
|
||||
--cookies FILE File to read cookies from and dump cookie
|
||||
jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl
|
||||
can store some downloaded information
|
||||
permanently. By default
|
||||
$XDG_CACHE_HOME/youtube-dl or
|
||||
~/.cache/youtube-dl . At the moment, only
|
||||
YouTube player files (for videos with
|
||||
obfuscated signatures) are cached, but that
|
||||
may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
-a, --batch-file FILE File containing URLs to download ('-'
|
||||
for stdin), one URL per line. Lines
|
||||
starting with '#', ';' or ']' are
|
||||
considered as comments and ignored.
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template, see the
|
||||
"OUTPUT TEMPLATE" for all the info
|
||||
--output-na-placeholder PLACEHOLDER Placeholder value for unavailable meta
|
||||
fields in output filename template
|
||||
(default is "NA")
|
||||
--autonumber-start NUMBER Specify the start value for
|
||||
%(autonumber)s (default is 1)
|
||||
--restrict-filenames Restrict filenames to only ASCII
|
||||
characters, and avoid "&" and spaces in
|
||||
filenames
|
||||
-w, --no-overwrites Do not overwrite files
|
||||
-c, --continue Force resume of partially downloaded
|
||||
files. By default, youtube-dl will
|
||||
resume downloads if possible.
|
||||
--no-continue Do not resume partially downloaded
|
||||
files (restart from beginning)
|
||||
--no-part Do not use .part files - write directly
|
||||
into output file
|
||||
--no-mtime Do not use the Last-modified header to
|
||||
set the file modification time
|
||||
--write-description Write video description to a
|
||||
.description file
|
||||
--write-info-json Write video metadata to a .info.json
|
||||
file
|
||||
--write-annotations Write video annotations to a
|
||||
.annotations.xml file
|
||||
--load-info-json FILE JSON file containing the video
|
||||
information (created with the "--write-
|
||||
info-json" option)
|
||||
--cookies FILE File to read cookies from and dump
|
||||
cookie jar in
|
||||
--cache-dir DIR Location in the filesystem where
|
||||
youtube-dl can store some downloaded
|
||||
information permanently. By default
|
||||
$XDG_CACHE_HOME/youtube-dl or
|
||||
~/.cache/youtube-dl . At the moment,
|
||||
only YouTube player files (for videos
|
||||
with obfuscated signatures) are cached,
|
||||
but that may change.
|
||||
--no-cache-dir Disable filesystem caching
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail images:
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--write-all-thumbnails Write all thumbnail image formats to disk
|
||||
--list-thumbnails Simulate and list all available thumbnail
|
||||
formats
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--write-all-thumbnails Write all thumbnail image formats to
|
||||
disk
|
||||
--list-thumbnails Simulate and list all available
|
||||
thumbnail formats
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet Activate quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not write
|
||||
anything to disk
|
||||
--skip-download Do not download the video
|
||||
-g, --get-url Simulate, quiet but print URL
|
||||
-e, --get-title Simulate, quiet but print title
|
||||
--get-id Simulate, quiet but print id
|
||||
--get-thumbnail Simulate, quiet but print thumbnail URL
|
||||
--get-description Simulate, quiet but print video description
|
||||
--get-duration Simulate, quiet but print video length
|
||||
--get-filename Simulate, quiet but print output filename
|
||||
--get-format Simulate, quiet but print output format
|
||||
-j, --dump-json Simulate, quiet but print JSON information.
|
||||
See the "OUTPUT TEMPLATE" for a description
|
||||
of available keys.
|
||||
-J, --dump-single-json Simulate, quiet but print JSON information
|
||||
for each command-line argument. If the URL
|
||||
refers to a playlist, dump the whole
|
||||
playlist information in a single line.
|
||||
--print-json Be quiet and print the video information as
|
||||
JSON (video is still being downloaded).
|
||||
--newline Output progress bar as new lines
|
||||
--no-progress Do not print progress bar
|
||||
--console-title Display progress in console titlebar
|
||||
-v, --verbose Print various debugging information
|
||||
--dump-pages Print downloaded pages encoded using base64
|
||||
to debug problems (very verbose)
|
||||
--write-pages Write downloaded intermediary pages to
|
||||
files in the current directory to debug
|
||||
problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
-C, --call-home Contact the youtube-dl server for debugging
|
||||
--no-call-home Do NOT contact the youtube-dl server for
|
||||
debugging
|
||||
-q, --quiet Activate quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not
|
||||
write anything to disk
|
||||
--skip-download Do not download the video
|
||||
-g, --get-url Simulate, quiet but print URL
|
||||
-e, --get-title Simulate, quiet but print title
|
||||
--get-id Simulate, quiet but print id
|
||||
--get-thumbnail Simulate, quiet but print thumbnail URL
|
||||
--get-description Simulate, quiet but print video
|
||||
description
|
||||
--get-duration Simulate, quiet but print video length
|
||||
--get-filename Simulate, quiet but print output
|
||||
filename
|
||||
--get-format Simulate, quiet but print output format
|
||||
-j, --dump-json Simulate, quiet but print JSON
|
||||
information. See the "OUTPUT TEMPLATE"
|
||||
for a description of available keys.
|
||||
-J, --dump-single-json Simulate, quiet but print JSON
|
||||
information for each command-line
|
||||
argument. If the URL refers to a
|
||||
playlist, dump the whole playlist
|
||||
information in a single line.
|
||||
--print-json Be quiet and print the video
|
||||
information as JSON (video is still
|
||||
being downloaded).
|
||||
--newline Output progress bar as new lines
|
||||
--no-progress Do not print progress bar
|
||||
--console-title Display progress in console titlebar
|
||||
-v, --verbose Print various debugging information
|
||||
--dump-pages Print downloaded pages encoded using
|
||||
base64 to debug problems (very verbose)
|
||||
--write-pages Write downloaded intermediary pages to
|
||||
files in the current directory to debug
|
||||
problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
-C, --call-home Contact the youtube-dl server for
|
||||
debugging
|
||||
--no-call-home Do NOT contact the youtube-dl server
|
||||
for debugging
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve
|
||||
information about the video. (Currently
|
||||
supported only for YouTube)
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video
|
||||
access is restricted to one domain
|
||||
--add-header FIELD:VALUE Specify a custom HTTP header and its value,
|
||||
separated by a colon ':'. You can use this
|
||||
option multiple times
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires bidiv
|
||||
or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download when used alone or a lower bound
|
||||
of a range for randomized sleep before each
|
||||
download (minimum possible number of
|
||||
seconds to sleep) when used along with
|
||||
--max-sleep-interval.
|
||||
--max-sleep-interval SECONDS Upper bound of a range for randomized sleep
|
||||
before each download (maximum possible
|
||||
number of seconds to sleep). Must only be
|
||||
used along with --min-sleep-interval.
|
||||
--encoding ENCODING Force the specified encoding
|
||||
(experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to
|
||||
retrieve information about the video.
|
||||
(Currently supported only for YouTube)
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the
|
||||
video access is restricted to one
|
||||
domain
|
||||
--add-header FIELD:VALUE Specify a custom HTTP header and its
|
||||
value, separated by a colon ':'. You
|
||||
can use this option multiple times
|
||||
--bidi-workaround Work around terminals that lack
|
||||
bidirectional text support. Requires
|
||||
bidiv or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each
|
||||
download when used alone or a lower
|
||||
bound of a range for randomized sleep
|
||||
before each download (minimum possible
|
||||
number of seconds to sleep) when used
|
||||
along with --max-sleep-interval.
|
||||
--max-sleep-interval SECONDS Upper bound of a range for randomized
|
||||
sleep before each download (maximum
|
||||
possible number of seconds to sleep).
|
||||
Must only be used along with --min-
|
||||
sleep-interval.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT Video format code, see the "FORMAT
|
||||
SELECTION" for all the info
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer free video formats unless a specific
|
||||
one is requested
|
||||
-F, --list-formats List all available formats of requested
|
||||
videos
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifests and
|
||||
related data on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
webm, flv. Ignored if no merge is required
|
||||
-f, --format FORMAT Video format code, see the "FORMAT
|
||||
SELECTION" for all the info
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer free video formats unless a
|
||||
specific one is requested
|
||||
-F, --list-formats List all available formats of requested
|
||||
videos
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifests and
|
||||
related data on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g.
|
||||
bestvideo+bestaudio), output to given
|
||||
container format. One of mkv, mp4, ogg,
|
||||
webm, flv. Ignored if no merge is
|
||||
required
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub Write subtitle file
|
||||
--write-auto-sub Write automatically generated subtitle file
|
||||
(YouTube only)
|
||||
--all-subs Download all the available subtitles of the
|
||||
video
|
||||
--list-subs List all available subtitles for the video
|
||||
--sub-format FORMAT Subtitle format, accepts formats
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
--sub-lang LANGS Languages of the subtitles to download
|
||||
(optional) separated by commas, use --list-
|
||||
subs for available language tags
|
||||
--write-sub Write subtitle file
|
||||
--write-auto-sub Write automatically generated subtitle
|
||||
file (YouTube only)
|
||||
--all-subs Download all the available subtitles of
|
||||
the video
|
||||
--list-subs List all available subtitles for the
|
||||
video
|
||||
--sub-format FORMAT Subtitle format, accepts formats
|
||||
preference, for example: "srt" or
|
||||
"ass/srt/best"
|
||||
--sub-lang LANGS Languages of the subtitles to download
|
||||
(optional) separated by commas, use
|
||||
--list-subs for available language tags
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME Login with this account ID
|
||||
-p, --password PASSWORD Account password. If this option is left
|
||||
out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
-u, --username USERNAME Login with this account ID
|
||||
-p, --password PASSWORD Account password. If this option is
|
||||
left out, youtube-dl will ask
|
||||
interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso for
|
||||
a list of available MSOs
|
||||
--ap-username USERNAME Multiple-system operator account login
|
||||
--ap-password PASSWORD Multiple-system operator account password.
|
||||
If this option is left out, youtube-dl will
|
||||
ask interactively.
|
||||
--ap-list-mso List all supported multiple-system
|
||||
operators
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
provider) identifier, use --ap-list-mso
|
||||
for a list of available MSOs
|
||||
--ap-username USERNAME Multiple-system operator account login
|
||||
--ap-password PASSWORD Multiple-system operator account
|
||||
password. If this option is left out,
|
||||
youtube-dl will ask interactively.
|
||||
--ap-list-mso List all supported multiple-system
|
||||
operators
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio Convert video files to audio-only files
|
||||
(requires ffmpeg or avconv and ffprobe or
|
||||
avprobe)
|
||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||
"flac", "mp3", "m4a", "opus", "vorbis", or
|
||||
"wav"; "best" by default; No effect without
|
||||
-x
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert
|
||||
a value between 0 (better) and 9 (worse)
|
||||
for VBR or a specific bitrate like 128K
|
||||
(default 5)
|
||||
--recode-video FORMAT Encode the video to another format if
|
||||
necessary (currently supported:
|
||||
mp4|flv|ogg|webm|mkv|avi)
|
||||
--postprocessor-args ARGS Give these arguments to the postprocessor
|
||||
-k, --keep-video Keep the video file on disk after the post-
|
||||
processing; the video is erased by default
|
||||
--no-post-overwrites Do not overwrite post-processed files; the
|
||||
post-processed files are overwritten by
|
||||
default
|
||||
--embed-subs Embed subtitles in the video (only for mp4,
|
||||
webm and mkv videos)
|
||||
--embed-thumbnail Embed thumbnail in the audio as cover art
|
||||
--add-metadata Write metadata to the video file
|
||||
--metadata-from-title FORMAT Parse additional metadata like song title /
|
||||
artist from the video title. The format
|
||||
syntax is the same as --output. Regular
|
||||
expression with named capture groups may
|
||||
also be used. The parsed parameters replace
|
||||
existing values. Example: --metadata-from-
|
||||
title "%(artist)s - %(title)s" matches a
|
||||
title like "Coldplay - Paradise". Example
|
||||
(regex): --metadata-from-title
|
||||
"(?P<artist>.+?) - (?P<title>.+)"
|
||||
--xattrs Write metadata to the video file's xattrs
|
||||
(using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the
|
||||
file. One of never (do nothing), warn (only
|
||||
emit a warning), detect_or_warn (the
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors (default)
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing, similar to
|
||||
find's -exec syntax. Example: --exec 'adb
|
||||
push {} /sdcard/Music/ && rm {}'
|
||||
--convert-subs FORMAT Convert the subtitles to other format
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
-x, --extract-audio Convert video files to audio-only files
|
||||
(requires ffmpeg/avconv and
|
||||
ffprobe/avprobe)
|
||||
--audio-format FORMAT Specify audio format: "best", "aac",
|
||||
"flac", "mp3", "m4a", "opus", "vorbis",
|
||||
or "wav"; "best" by default; No effect
|
||||
without -x
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality,
|
||||
insert a value between 0 (better) and 9
|
||||
(worse) for VBR or a specific bitrate
|
||||
like 128K (default 5)
|
||||
--recode-video FORMAT Encode the video to another format if
|
||||
necessary (currently supported:
|
||||
mp4|flv|ogg|webm|mkv|avi)
|
||||
--postprocessor-args ARGS Give these arguments to the
|
||||
postprocessor
|
||||
-k, --keep-video Keep the video file on disk after the
|
||||
post-processing; the video is erased by
|
||||
default
|
||||
--no-post-overwrites Do not overwrite post-processed files;
|
||||
the post-processed files are
|
||||
overwritten by default
|
||||
--embed-subs Embed subtitles in the video (only for
|
||||
mp4, webm and mkv videos)
|
||||
--embed-thumbnail Embed thumbnail in the audio as cover
|
||||
art
|
||||
--add-metadata Write metadata to the video file
|
||||
--metadata-from-title FORMAT Parse additional metadata like song
|
||||
title / artist from the video title.
|
||||
The format syntax is the same as
|
||||
--output. Regular expression with named
|
||||
capture groups may also be used. The
|
||||
parsed parameters replace existing
|
||||
values. Example: --metadata-from-title
|
||||
"%(artist)s - %(title)s" matches a
|
||||
title like "Coldplay - Paradise".
|
||||
Example (regex): --metadata-from-title
|
||||
"(?P<artist>.+?) - (?P<title>.+)"
|
||||
--xattrs Write metadata to the video file's
|
||||
xattrs (using dublin core and xdg
|
||||
standards)
|
||||
--fixup POLICY Automatically correct known faults of
|
||||
the file. One of never (do nothing),
|
||||
warn (only emit a warning),
|
||||
detect_or_warn (the default; fix file
|
||||
if we can, warn otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running
|
||||
the postprocessors
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running
|
||||
the postprocessors (default)
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading and post-processing,
|
||||
similar to find's -exec syntax.
|
||||
Example: --exec 'adb push {}
|
||||
/sdcard/Music/ && rm {}'
|
||||
--convert-subs FORMAT Convert the subtitles to other format
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
@@ -583,7 +620,7 @@ Available for the media that is a track or a part of a music album:
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
- `release_year` (numeric): Year (YYYY) when the album was released
|
||||
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
|
||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||
|
||||
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
|
||||
|
||||
|
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
|
||||
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
|
||||
$HOME/jython/bin/jython -m pip install nose
|
@@ -46,14 +46,16 @@
|
||||
- **Amara**
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **AmericasTestKitchenSeason**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **AnimeOnDemand**
|
||||
- **Anvato**
|
||||
- **aol.com**
|
||||
- **aol.com**: Yahoo screen and movies
|
||||
- **APA**
|
||||
- **Aparat**
|
||||
- **AppleConnect**
|
||||
- **AppleDaily**: 臺灣蘋果日報
|
||||
- **ApplePodcasts**
|
||||
- **appletrailers**
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
@@ -98,6 +100,10 @@
|
||||
- **BellMedia**
|
||||
- **Bet**
|
||||
- **bfi:player**
|
||||
- **bfmtv**
|
||||
- **bfmtv:article**
|
||||
- **bfmtv:live**
|
||||
- **BibelTV**
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili**
|
||||
@@ -187,8 +193,6 @@
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralFullEpisodes**
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
@@ -336,6 +340,8 @@
|
||||
- **Go**
|
||||
- **GodTube**
|
||||
- **Golem**
|
||||
- **google:podcasts**
|
||||
- **google:podcasts:feed**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
- **GPUTechConf**
|
||||
@@ -370,6 +376,8 @@
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **ign.com**
|
||||
- **IHeartRadio**
|
||||
- **iheartradio:podcast**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
@@ -409,7 +417,8 @@
|
||||
- **Katsomo**
|
||||
- **KeezMovies**
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **khanacademy**
|
||||
- **khanacademy:unit**
|
||||
- **KickStarter**
|
||||
- **KinjaEmbed**
|
||||
- **KinoPoisk**
|
||||
@@ -496,6 +505,9 @@
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **MiaoPai**
|
||||
- **minds**
|
||||
- **minds:channel**
|
||||
- **minds:group**
|
||||
- **MinistryGrid**
|
||||
- **Minoto**
|
||||
- **miomio.tv**
|
||||
@@ -691,7 +703,6 @@
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **plus.google**: Google Plus
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
@@ -850,6 +861,8 @@
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **spotify**
|
||||
- **spotify:show**
|
||||
- **Spreaker**
|
||||
- **SpreakerPage**
|
||||
- **SpreakerShow**
|
||||
@@ -862,6 +875,7 @@
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **StitcherShow**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
@@ -930,12 +944,13 @@
|
||||
- **TNAFlixNetworkEmbed**
|
||||
- **toggle**
|
||||
- **ToonGoggles**
|
||||
- **Tosh**: Tosh.0
|
||||
- **tou.tv**
|
||||
- **Toypics**: Toypics video
|
||||
- **ToypicsUser**: Toypics user profile
|
||||
- **TrailerAddict** (Currently broken)
|
||||
- **Trilulilu**
|
||||
- **Trovo**
|
||||
- **TrovoVod**
|
||||
- **TruNews**
|
||||
- **TruTV**
|
||||
- **Tube8**
|
||||
@@ -1123,7 +1138,7 @@
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XHamsterUser**
|
||||
|
@@ -464,6 +464,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
assert_syntax_error('+bestaudio')
|
||||
assert_syntax_error('bestvideo+')
|
||||
assert_syntax_error('/')
|
||||
assert_syntax_error('bestvideo+bestvideo+bestaudio')
|
||||
|
||||
def test_format_filtering(self):
|
||||
formats = [
|
||||
@@ -632,13 +633,20 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'title2': '%PATH%',
|
||||
}
|
||||
|
||||
def fname(templ):
|
||||
ydl = YoutubeDL({'outtmpl': templ})
|
||||
def fname(templ, na_placeholder='NA'):
|
||||
params = {'outtmpl': templ}
|
||||
if na_placeholder != 'NA':
|
||||
params['outtmpl_na_placeholder'] = na_placeholder
|
||||
ydl = YoutubeDL(params)
|
||||
return ydl.prepare_filename(info)
|
||||
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
|
||||
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
|
||||
# Replace missing fields with 'NA'
|
||||
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
|
||||
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
|
||||
# Replace missing fields with 'NA' by default
|
||||
self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
|
||||
# Or by provided placeholder
|
||||
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
|
||||
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
|
||||
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
|
||||
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
|
||||
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')
|
||||
|
@@ -258,16 +258,24 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
|
||||
|
||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiPlayIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
def test_subtitles_key(self):
|
||||
self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
|
||||
|
||||
def test_subtitles_array_key(self):
|
||||
self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
|
||||
|
||||
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
|
@@ -163,6 +163,7 @@ class YoutubeDL(object):
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code. See options.py for more information.
|
||||
outtmpl: Template for output names.
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
force_generic_extractor: Force downloader to use the generic extractor
|
||||
@@ -338,6 +339,8 @@ class YoutubeDL(object):
|
||||
_pps = []
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
_playlist_level = 0
|
||||
_playlist_urls = set()
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params=None, auto_init=True):
|
||||
@@ -656,7 +659,7 @@ class YoutubeDL(object):
|
||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||
for k, v in template_dict.items()
|
||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
||||
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
|
||||
|
||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||
|
||||
@@ -676,8 +679,8 @@ class YoutubeDL(object):
|
||||
|
||||
# Missing numeric fields used together with integer presentation types
|
||||
# in format specification will break the argument substitution since
|
||||
# string 'NA' is returned for missing fields. We will patch output
|
||||
# template for missing fields to meet string presentation type.
|
||||
# string NA placeholder is returned for missing fields. We will patch
|
||||
# output template for missing fields to meet string presentation type.
|
||||
for numeric_field in self._NUMERIC_FIELDS:
|
||||
if numeric_field not in template_dict:
|
||||
# As of [1] format syntax is:
|
||||
@@ -906,115 +909,23 @@ class YoutubeDL(object):
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type in ('playlist', 'multi_video'):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
# Protect from infinite recursion due to recursively nested playlists
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||
webpage_url = ie_result['webpage_url']
|
||||
if webpage_url in self._playlist_urls:
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
'[download] Skipping already downloaded playlist: %s'
|
||||
% ie_result.get('title') or ie_result.get('id'))
|
||||
return
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
self._playlist_level += 1
|
||||
self._playlist_urls.add(webpage_url)
|
||||
try:
|
||||
return self.__process_playlist(ie_result, download)
|
||||
finally:
|
||||
self._playlist_level -= 1
|
||||
if not self._playlist_level:
|
||||
self._playlist_urls.clear()
|
||||
elif result_type == 'compat_list':
|
||||
self.report_warning(
|
||||
'Extractor %s returned a compat_list result. '
|
||||
@@ -1039,6 +950,118 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def __process_playlist(self, ie_result, download):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __process_iterable_entry(self, entry, download, extra_info):
|
||||
return self.process_ie_result(
|
||||
@@ -1226,6 +1249,8 @@ class YoutubeDL(object):
|
||||
group = _parse_format_selection(tokens, inside_group=True)
|
||||
current_selector = FormatSelector(GROUP, group, [])
|
||||
elif string == '+':
|
||||
if inside_merge:
|
||||
raise syntax_error('Unexpected "+"', start)
|
||||
video_selector = current_selector
|
||||
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
||||
if not video_selector or not audio_selector:
|
||||
@@ -1777,6 +1802,8 @@ class YoutubeDL(object):
|
||||
os.makedirs(dn)
|
||||
return True
|
||||
except (OSError, IOError) as err:
|
||||
if isinstance(err, OSError) and err.errno == errno.EEXIST:
|
||||
return True
|
||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||
return False
|
||||
|
||||
|
@@ -340,6 +340,7 @@ def _real_main(argv=None):
|
||||
'format': opts.format,
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
'autonumber_start': opts.autonumber_start,
|
||||
'restrictfilenames': opts.restrictfilenames,
|
||||
|
@@ -172,8 +172,12 @@ class HlsFD(FragmentFD):
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if not test:
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
|
@@ -10,6 +10,7 @@ import random
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
@@ -18,11 +19,13 @@ from ..utils import (
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,16 +34,27 @@ class ADNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
@@ -54,26 +68,24 @@ class ADNIE(InfoExtractor):
|
||||
def _ass_subtitles_timecode(seconds):
|
||||
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, sub_path),
|
||||
video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, subtitle_location),
|
||||
video_id, 'Downloading subtitles data', fatal=False,
|
||||
headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -119,59 +131,76 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_config = self._parse_json(self._search_regex(
|
||||
r'playerConfig\s*=\s*({.+});', webpage,
|
||||
'player config', default='{}'), video_id, fatal=False)
|
||||
if not player_config:
|
||||
config_url = urljoin(self._BASE_URL, self._search_regex(
|
||||
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
|
||||
webpage, 'config url'))
|
||||
player_config = self._download_json(
|
||||
config_url, video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
options = player['options']
|
||||
|
||||
video_info = {}
|
||||
video_info_str = self._search_regex(
|
||||
r'videoInfo\s*=\s*({.+});', webpage,
|
||||
'video info', fatal=False)
|
||||
if video_info_str:
|
||||
video_info = self._parse_json(
|
||||
video_info_str, video_id, fatal=False) or {}
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
raise ExtractorError(
|
||||
'This video is only available for paying users', expected=True)
|
||||
# self.raise_login_required() # FIXME: Login is not implemented
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
links = player_config.get('links') or {}
|
||||
sub_path = player_config.get('subtitles')
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
token = options['token']
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
'e': 60,
|
||||
't': token,
|
||||
}))
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
links_data = self._download_json(
|
||||
urljoin(self._BASE_URL, links_url), video_id,
|
||||
'Downloading links JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + authorization,
|
||||
})
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles') or \
|
||||
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
|
||||
sub_path += '&token=' + token
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web'
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
else:
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
raise ExtractorError('Giving up retrying')
|
||||
|
||||
links = links_data.get('links') or {}
|
||||
metas = links_data.get('metadata') or {}
|
||||
sub_url = (links.get('subtitles') or {}).get('all')
|
||||
video_info = links_data.get('video') or {}
|
||||
title = metas['title']
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
for format_id, qualities in (links.get('streaming') or {}).items():
|
||||
if not isinstance(qualities, dict):
|
||||
continue
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
@@ -189,19 +218,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
formats.extend(m3u8_formats)
|
||||
if not error:
|
||||
error = options.get('error')
|
||||
if not formats and error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'description': strip_or_none(metas.get('summary') or video.get('summary')),
|
||||
'thumbnail': video_info.get('image') or player.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
'subtitles': self.extract_subtitles(sub_url, video_id),
|
||||
'episode': metas.get('subtitle') or video.get('name'),
|
||||
'episode_number': int_or_none(video.get('shortNumber')),
|
||||
'series': show.get('title'),
|
||||
'season_number': int_or_none(video.get('season')),
|
||||
'duration': int_or_none(video_info.get('duration') or video.get('duration')),
|
||||
'release_date': unified_strdate(video.get('releaseDate')),
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
@@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
'playlist_mincount': 168,
|
||||
'playlist_mincount': 150,
|
||||
}]
|
||||
_RESOURCE = 'series'
|
||||
_ITEMS_KEY = 'episodes'
|
||||
|
@@ -1,13 +1,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
'ext': 'mp4',
|
||||
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
post_type, name = re.match(self._VALID_URL, url).groups()
|
||||
post_type = {
|
||||
'features': 'post',
|
||||
'program': 'episode',
|
||||
'videos': 'video',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
'https://www.aljazeera.com/graphql', name, query={
|
||||
'operationName': 'SingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': name,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
'wp-site': 'aje',
|
||||
})['data']['article']['video']
|
||||
video_id = video['id']
|
||||
account_id = video.get('accountId') or '665003303001'
|
||||
player_id = video.get('playerId') or 'BkeSH5BDb'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
|
@@ -1,13 +1,16 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
@@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
|
||||
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
|
||||
'md5': '06451608c57651e985a498e69cec17e5',
|
||||
'info_dict': {
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 21,
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
@@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
'release_date': unified_strdate(video.get('publishDate')),
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'series': try_get(episode, lambda x: x['show']['title']),
|
||||
'episode': episode.get('title'),
|
||||
}
|
||||
|
||||
|
||||
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# ATK Season
|
||||
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||
'info_dict': {
|
||||
'id': 'season_1',
|
||||
'title': 'Season 1',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# Cooks Country Season
|
||||
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||
'info_dict': {
|
||||
'id': 'season_12',
|
||||
'title': 'Season 12',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
||||
season_number = int(season_number)
|
||||
|
||||
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||
|
||||
season = 'Season %d' % season_number
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
season, headers={
|
||||
'Origin': 'https://www.%s.com' % show_name,
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps([
|
||||
'search_season_list:' + season,
|
||||
'search_document_klass:episode',
|
||||
'search_show_slug:' + slug,
|
||||
]),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode in (season_search.get('hits') or []):
|
||||
search_url = episode.get('search_url')
|
||||
if not search_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), 'season_%d' % season_number, season)
|
||||
|
@@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
entries = []
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
@@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
yield f
|
||||
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
@@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'title': m.group('title'),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
yield f
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
@@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
for e in extract_entries(episode_html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
@@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
for e in extract_entries(html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
extract_episodes(webpage)
|
||||
def entries():
|
||||
has_episodes = False
|
||||
for e in extract_episodes(webpage):
|
||||
has_episodes = True
|
||||
yield e
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
if not has_episodes:
|
||||
for e in extract_film(webpage, anime_id):
|
||||
yield e
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
return self.playlist_result(
|
||||
entries(), anime_id, anime_title, anime_description)
|
||||
|
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .yahoo import YahooIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
@@ -15,9 +15,9 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class AolIE(InfoExtractor):
|
||||
class AolIE(YahooIE):
|
||||
IE_NAME = 'aol.com'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
|
||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
# video with 5min ID
|
||||
@@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Yahoo video
|
||||
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if '-' in video_id:
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
|
@@ -187,13 +187,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._html_search_regex(
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
@@ -249,18 +249,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
# available till 7.01.2022
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||
'info_dict': {
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'display_id': 'maischberger-die-woche',
|
||||
'id': '100',
|
||||
'ext': 'mp4',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'duration': 3687.0,
|
||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||
'upload_date': '20210107',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
@@ -315,17 +315,17 @@ class ARDIE(InfoExtractor):
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': '70153354',
|
||||
'id': '78566716',
|
||||
'title': 'Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||
'timestamp': 1577047500,
|
||||
'upload_date': '20191222',
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
|
||||
'timestamp': 1596658200,
|
||||
'upload_date': '20200805',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
|
103
youtube_dl/extractor/bfmtv.py
Normal file
103
youtube_dl/extractor/bfmtv.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
|
||||
|
||||
class BFMTVIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
|
||||
'info_dict': {
|
||||
'id': '6196747868001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
|
||||
'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20201002',
|
||||
'timestamp': 1601629620,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20171018',
|
||||
'timestamp': 1508329950,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/economie/en-direct/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
|
||||
'info_dict': {
|
||||
'id': '202101060198',
|
||||
'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
|
||||
'description': 'md5:947974089c303d3ac6196670ae262843',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
30
youtube_dl/extractor/bibeltv.py
Normal file
30
youtube_dl/extractor/bibeltv.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BibelTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
|
||||
'md5': '252f908192d611de038b8504b08bf97f',
|
||||
'info_dict': {
|
||||
'id': 'ref:329703',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprachkurs in Malaiisch',
|
||||
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
|
||||
'timestamp': 1608316701,
|
||||
'uploader_id': '5840105145001',
|
||||
'upload_date': '20201218',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
crn_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
|
@@ -7,12 +7,12 @@ from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -37,6 +37,7 @@ class CanvasIE(InfoExtractor):
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8',
|
||||
@@ -47,29 +48,34 @@ class CanvasIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
data = None
|
||||
if site_id != 'vrtvideo':
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
|
||||
# New API endpoint
|
||||
if not data:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({'Content-Type': 'application/json'})
|
||||
token = self._download_json(
|
||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||
'Downloading token', data=b'',
|
||||
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
|
||||
'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
|
||||
data = self._download_json(
|
||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||
video_id, 'Downloading video JSON', fatal=False, query={
|
||||
video_id, 'Downloading video JSON', query={
|
||||
'vrtPlayerToken': token,
|
||||
'client': '%s@PROD' % site_id,
|
||||
}, expected_status=400)
|
||||
message = data.get('message')
|
||||
if message and not data.get('title'):
|
||||
if data.get('code') == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
if not data.get('title'):
|
||||
code = data.get('code')
|
||||
if code == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required()
|
||||
elif code == 'INVALID_LOCATION':
|
||||
self.raise_geo_restricted(countries=['BE'])
|
||||
raise ExtractorError(data.get('message') or code, expected=True)
|
||||
|
||||
title = data['title']
|
||||
description = data.get('description')
|
||||
@@ -205,20 +211,24 @@ class CanvasEenIE(InfoExtractor):
|
||||
|
||||
class VrtNUIE(GigyaBaseIE):
|
||||
IE_DESC = 'VrtNU.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Available via old API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'ext': 'mp4',
|
||||
'title': 'De zwarte weduwe',
|
||||
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
|
||||
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
|
||||
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
|
||||
'duration': 1457.04,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Postbus X',
|
||||
'season': 'Seizoen 1989',
|
||||
'season_number': 1989,
|
||||
'episode': 'De zwarte weduwe',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1595822400,
|
||||
'upload_date': '20200727',
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
@@ -300,69 +310,25 @@ class VrtNUIE(GigyaBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<nui-media[^>]+>)', webpage, 'media element'))
|
||||
video_id = attrs['videoid']
|
||||
publication_id = attrs.get('publicationid')
|
||||
if publication_id:
|
||||
video_id = publication_id + '$' + video_id
|
||||
|
||||
page = (self._parse_json(self._search_regex(
|
||||
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
|
||||
default='{}'), video_id, fatal=False) or {}).get('page') or {}
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
season = self._html_search_regex(
|
||||
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
|
||||
<span>seizoen\ (.+?)</span>\s*
|
||||
</div>''',
|
||||
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
|
||||
webpage, 'season', default=None)
|
||||
|
||||
season_number = int_or_none(season)
|
||||
|
||||
episode_number = int_or_none(self._html_search_regex(
|
||||
r'''(?xms)<div\ class="content__episode">\s*
|
||||
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
|
||||
</div>''',
|
||||
webpage, 'episode_number', default=None))
|
||||
|
||||
release_date = parse_iso8601(self._html_search_regex(
|
||||
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
|
||||
webpage, 'release_date', default=None))
|
||||
|
||||
# If there's a ? or a # in the URL, remove them and everything after
|
||||
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
|
||||
securevideo_url = clean_url + '.mssecurevideo.json'
|
||||
|
||||
try:
|
||||
video = self._download_json(securevideo_url, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
# We are dealing with a '../<show>.relevant' URL
|
||||
redirect_url = video.get('url')
|
||||
if redirect_url:
|
||||
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
|
||||
|
||||
# There is only one entry, but with an unknown key, so just get
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
'season_number': int_or_none(page.get('episode_season')),
|
||||
})
|
||||
|
@@ -1,142 +1,51 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .mtv import MTVServicesInfoExtractor
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
|
||||
/(?P<title>.*)'''
|
||||
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
||||
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
||||
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
|
||||
'md5': 'b8acb347177c680ff18a292aa2166f80',
|
||||
'info_dict': {
|
||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
||||
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
|
||||
'ext': 'mp4',
|
||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
||||
'timestamp': 1376798400,
|
||||
'upload_date': '20130818',
|
||||
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
|
||||
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
|
||||
'timestamp': 1598670000,
|
||||
'upload_date': '20200829',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
||||
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
||||
(?:full-episodes|shows(?=/[^/]+/full-episodes))
|
||||
/(?P<id>[^?]+)'''
|
||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
|
||||
'info_dict': {
|
||||
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
|
||||
'title': 'November 28, 2016 - Ryan Speedo Green',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}, {
|
||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
|
||||
videos_info = self._get_videos_info(mgid)
|
||||
return videos_info
|
||||
|
||||
|
||||
class ToshIE(MTVServicesInfoExtractor):
|
||||
IE_DESC = 'Tosh.0'
|
||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
||||
'info_dict': {
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'title': 'Twitter Users Share Summer Plans',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
||||
'info_dict': {
|
||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
||||
'description': 'Tosh asked fans to share their summer plans.',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
# It's really reported to be published on year 2077
|
||||
'upload_date': '20770610',
|
||||
'timestamp': 3390510600,
|
||||
'subtitles': {
|
||||
'en': 'mincount:3',
|
||||
},
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
||||
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
||||
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
|
||||
'info_dict': {
|
||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
||||
'ext': 'flv',
|
||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
||||
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
|
||||
'ext': 'mp4',
|
||||
'title': 'Josh Investigates',
|
||||
'description': 'Steht uns das Ende der Welt bevor?',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
mrss_url = self._search_regex(
|
||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'mrss url', group='url')
|
||||
|
||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
||||
|
||||
|
||||
class ComedyCentralShortnameIE(InfoExtractor):
|
||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
||||
_TESTS = [{
|
||||
'url': ':tds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':thedailyshow',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': ':theopposition',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
shortcut_map = {
|
||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'accountOverride': 'intl.mtvi.com',
|
||||
'arcEp': 'web.cc.tv',
|
||||
'ep': 'b9032c3a',
|
||||
'imageEp': 'web.cc.tv',
|
||||
'mgid': uri,
|
||||
}
|
||||
return self.url_result(shortcut_map[video_id])
|
||||
|
@@ -8,11 +8,14 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
@@ -116,8 +119,30 @@ class CSpanIE(InfoExtractor):
|
||||
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||
base_url=url)
|
||||
add_referer(info['formats'])
|
||||
for subtitles in info['subtitles'].values():
|
||||
for subtitle in subtitles:
|
||||
ext = determine_ext(subtitle['url'])
|
||||
if ext == 'php':
|
||||
ext = 'vtt'
|
||||
subtitle['ext'] = ext
|
||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||
return merge_dicts(info, ld_info)
|
||||
title = get_element_by_class('video-page-title', webpage) or \
|
||||
self._og_search_title(webpage)
|
||||
description = get_element_by_attribute('itemprop', 'description', webpage) or \
|
||||
self._html_search_meta(['og:description', 'description'], webpage)
|
||||
return merge_dicts(info, ld_info, {
|
||||
'title': title,
|
||||
'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
|
||||
'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
|
||||
'duration': int_or_none(self._search_regex(
|
||||
r'jwsetup\.seclength\s*=\s*(\d+);',
|
||||
webpage, 'duration', fatal=False)),
|
||||
'view_count': str_to_int(self._search_regex(
|
||||
r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
|
||||
webpage, 'views', fatal=False)),
|
||||
})
|
||||
|
||||
# Obsolete
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
|
@@ -17,7 +17,12 @@ from ..utils import (
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
|
||||
(?:www\.)?(?P<host>d
|
||||
(?:
|
||||
play\.(?P<country>dk|fi|jp|se|no)|
|
||||
iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
|
||||
)
|
||||
)|
|
||||
(?P<subdomain_country>es|it)\.dplay\.com
|
||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||
|
||||
@@ -126,6 +131,24 @@ class DPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.dplay.jp/video/gold-rush/24086',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||
@@ -241,7 +264,7 @@ class DPlayIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain').lstrip('www.')
|
||||
country = mobj.group('country') or mobj.group('subdomain_country')
|
||||
host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
|
||||
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
|
||||
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
@@ -42,7 +42,10 @@ from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .americastestkitchen import AmericasTestKitchenIE
|
||||
from .americastestkitchen import (
|
||||
AmericasTestKitchenIE,
|
||||
AmericasTestKitchenSeasonIE,
|
||||
)
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anvato import AnvatoIE
|
||||
from .aol import AolIE
|
||||
@@ -101,6 +104,12 @@ from .bellmedia import BellMediaIE
|
||||
from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bfi import BFIPlayerIE
|
||||
from .bfmtv import (
|
||||
BFMTVIE,
|
||||
BFMTVLiveIE,
|
||||
BFMTVArticleIE,
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
@@ -226,11 +235,8 @@ from .cnn import (
|
||||
)
|
||||
from .coub import CoubIE
|
||||
from .comedycentral import (
|
||||
ComedyCentralFullEpisodesIE,
|
||||
ComedyCentralIE,
|
||||
ComedyCentralShortnameIE,
|
||||
ComedyCentralTVIE,
|
||||
ToshIE,
|
||||
)
|
||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .commonprotocols import (
|
||||
@@ -520,7 +526,10 @@ from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
)
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
@@ -642,6 +651,11 @@ from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyIE,
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsIE,
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
)
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
@@ -1107,6 +1121,10 @@ from .stitcher import (
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .spotify import (
|
||||
SpotifyIE,
|
||||
SpotifyShowIE,
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
@@ -1220,6 +1238,10 @@ from .toutv import TouTvIE
|
||||
from .toypics import ToypicsUserIE, ToypicsIE
|
||||
from .traileraddict import TrailerAddictIE
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
TrovoIE,
|
||||
TrovoVodIE,
|
||||
)
|
||||
from .trunews import TruNewsIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class FranceCultureIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||
'info_dict': {
|
||||
'id': 'rendez-vous-au-pays-des-geeks',
|
||||
@@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
|
||||
'title': 'Rendez-vous au pays des geeks',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140301',
|
||||
'timestamp': 1393642916,
|
||||
'timestamp': 1393700400,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# no thumbnail
|
||||
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
|
||||
</h1>|
|
||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||
).*?
|
||||
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
||||
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
||||
''',
|
||||
webpage, 'video data'))
|
||||
|
||||
video_url = video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
||||
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
||||
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||
webpage, 'description', default=None)
|
||||
thumbnail = self._search_regex(
|
||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
webpage, 'thumbnail', default=None)
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<span class="author">(.*?)</span>',
|
||||
webpage, 'uploader', default=None)
|
||||
@@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
|
||||
'ext': ext,
|
||||
'vcodec': 'none' if ext == 'mp3' else None,
|
||||
'uploader': uploader,
|
||||
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
||||
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
||||
'duration': int_or_none(video_data.get('data-duration')),
|
||||
}
|
||||
|
@@ -35,7 +35,7 @@ class IHeartRadioIE(IHeartRadioBaseIE):
|
||||
'id': '70346499',
|
||||
'ext': 'mp3',
|
||||
'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
|
||||
'description': 'md5:66480b2d25ec93a5f60c0faa3275ce5c',
|
||||
'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
|
||||
'timestamp': 1597741200,
|
||||
'upload_date': '20200818',
|
||||
}
|
||||
|
@@ -2,92 +2,71 @@ from __future__ import unicode_literals
|
||||
|
||||
from .canvas import CanvasIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class KetnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||
'md5': '6bdeb65998930251bbd1c510750edba9',
|
||||
'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
|
||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
||||
'info_dict': {
|
||||
'id': 'zomerse-filmpjes',
|
||||
'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
|
||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||
'title': 'Nachtwacht - Reeks 3: Aflevering 1',
|
||||
'description': 'De Nachtwacht krijgt te maken met een parasiet',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
# mzid in playerConfig instead of sources
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
'duration': 1468.02,
|
||||
'timestamp': 1609225200,
|
||||
'upload_date': '20201229',
|
||||
'series': 'Nachtwacht',
|
||||
'season': 'Reeks 3',
|
||||
'episode': 'De Greystook',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# mzsource, geo restricted to Belgium
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
|
||||
'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video = self._download_json(
|
||||
'https://senior-bff.ketnet.be/graphql', display_id, query={
|
||||
'query': '''{
|
||||
video(id: "content/ketnet/nl/%s.model.json") {
|
||||
description
|
||||
episodeNr
|
||||
imageUrl
|
||||
mediaReference
|
||||
programTitle
|
||||
publicationDate
|
||||
seasonTitle
|
||||
subtitleVideodetail
|
||||
titleVideodetail
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['video']
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
|
||||
'player config'),
|
||||
video_id)
|
||||
|
||||
mzid = config.get('mzid')
|
||||
if mzid:
|
||||
return self.url_result(
|
||||
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
|
||||
CanvasIE.ie_key(), video_id=mzid)
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = []
|
||||
for source_key in ('', 'mz'):
|
||||
source = config.get('%ssource' % source_key)
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
for format_id, format_url in source.items():
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif format_id == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
mz_id = compat_urllib_parse_unquote(video['mediaReference'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': config.get('description'),
|
||||
'thumbnail': config.get('image'),
|
||||
'series': config.get('program'),
|
||||
'episode': config.get('episode'),
|
||||
'formats': formats,
|
||||
'_type': 'url_transparent',
|
||||
'id': mz_id,
|
||||
'title': video['titleVideodetail'],
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
|
||||
'thumbnail': video.get('imageUrl'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('publicationDate')),
|
||||
'series': video.get('programTitle'),
|
||||
'season': video.get('seasonTitle'),
|
||||
'episode': video.get('subtitleVideodetail'),
|
||||
'episode_number': int_or_none(video.get('episodeNr')),
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
}
|
||||
|
@@ -1,82 +1,107 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class KhanAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
IE_NAME = 'KhanAcademy'
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.khanacademy.org/video/one-time-pad',
|
||||
'md5': '7b391cce85e758fb94f763ddc1bbb979',
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video['youtubeId'],
|
||||
'id': video.get('slug'),
|
||||
'title': video.get('title'),
|
||||
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'description': video.get('description'),
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
component_props = self._parse_json(self._download_json(
|
||||
'https://www.khanacademy.org/api/internal/graphql',
|
||||
display_id, query={
|
||||
'hash': 1604303425,
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'queryParams': '',
|
||||
}),
|
||||
})['data']['contentJson'], display_id)['componentProps']
|
||||
return self._parse_component_props(component_props)
|
||||
|
||||
|
||||
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy'
|
||||
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||
'info_dict': {
|
||||
'id': 'one-time-pad',
|
||||
'ext': 'webm',
|
||||
'id': 'FlIG3TvQCBQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'upload_date': '20120411',
|
||||
'timestamp': 1334170113,
|
||||
'license': 'cc-by-nc-sa',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
||||
}
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
video = component_props['tutorialPageData']['contentModel']
|
||||
info = self._parse_video(video)
|
||||
author_names = video.get('authorNames')
|
||||
info.update({
|
||||
'uploader': ', '.join(author_names) if author_names else None,
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'license': video.get('kaUserLicense'),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy:unit'
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||
'info_dict': {
|
||||
'id': 'cryptography',
|
||||
'title': 'Journey into cryptography',
|
||||
'title': 'Cryptography',
|
||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
'playlist_mincount': 31,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
def _parse_component_props(self, component_props):
|
||||
curation = component_props['curation']
|
||||
|
||||
if m.group('key') == 'video':
|
||||
data = self._download_json(
|
||||
'http://api.khanacademy.org/api/v1/videos/' + video_id,
|
||||
video_id, 'Downloading video info')
|
||||
|
||||
upload_date = unified_strdate(data['date_added'])
|
||||
uploader = ', '.join(data['author_names'])
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['url'],
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['image_url'],
|
||||
'duration': data['duration'],
|
||||
'description': data['description'],
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
entries = []
|
||||
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||
chapter_info = {
|
||||
'chapter': tutorial.get('title'),
|
||||
'chapter_number': tutorial_number,
|
||||
'chapter_id': tutorial.get('id'),
|
||||
}
|
||||
else:
|
||||
# topic
|
||||
data = self._download_json(
|
||||
'http://api.khanacademy.org/api/v1/topic/' + video_id,
|
||||
video_id, 'Downloading topic info')
|
||||
for content_item in (tutorial.get('contentItems') or []):
|
||||
if content_item.get('kind') == 'Video':
|
||||
info = self._parse_video(content_item)
|
||||
info.update(chapter_info)
|
||||
entries.append(info)
|
||||
|
||||
entries = [
|
||||
{
|
||||
'_type': 'url',
|
||||
'url': c['url'],
|
||||
'id': c['id'],
|
||||
'title': c['title'],
|
||||
}
|
||||
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'entries': entries,
|
||||
}
|
||||
return self.playlist_result(
|
||||
entries, curation.get('unit'), curation.get('title'),
|
||||
curation.get('description'))
|
||||
|
@@ -5,7 +5,10 @@ import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -139,6 +145,7 @@ class LBRYIE(LBRYBaseIE):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
display_id = display_id.replace(':', '#')
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
result_value = result['value']
|
||||
|
196
youtube_dl/extractor/minds.py
Normal file
196
youtube_dl/extractor/minds.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MindsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
api_url = 'https://www.minds.com/api/' + path
|
||||
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||
return self._download_json(
|
||||
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||
'Referer': 'https://www.minds.com/',
|
||||
'X-XSRF-TOKEN': token.value if token else '',
|
||||
}, query=query)
|
||||
|
||||
|
||||
class MindsIE(MindsBaseIE):
|
||||
IE_NAME = 'minds'
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.minds.com/media/100000000000086822',
|
||||
'md5': '215a658184a419764852239d4970b045',
|
||||
'info_dict': {
|
||||
'id': '100000000000086822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minds intro sequence',
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'uploader_id': 'ottman',
|
||||
'upload_date': '20130524',
|
||||
'timestamp': 1369404826,
|
||||
'uploader': 'Bill Ottman',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'tags': ['animation'],
|
||||
'comment_count': int,
|
||||
'license': 'attribution-cc',
|
||||
},
|
||||
}, {
|
||||
# entity.type == 'activity' and empty title
|
||||
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||
'info_dict': {
|
||||
'id': '798022190320226304',
|
||||
'ext': 'mp4',
|
||||
'title': '798022190320226304',
|
||||
'uploader': 'ColinFlaherty',
|
||||
'upload_date': '20180111',
|
||||
'timestamp': 1515639316,
|
||||
'uploader_id': 'ColinFlaherty',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# youtube perma_url
|
||||
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._call_api(
|
||||
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||
if entity.get('type') == 'activity':
|
||||
if entity.get('custom_type') == 'video':
|
||||
video_id = entity['entity_guid']
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
'v2/media/video/' + video_id, video_id, 'video')
|
||||
|
||||
formats = []
|
||||
for source in (video.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'height': int_or_none(source.get('size')),
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entity = video.get('entity') or entity
|
||||
owner = entity.get('ownerObj') or {}
|
||||
uploader_id = owner.get('username')
|
||||
|
||||
tags = entity.get('tags')
|
||||
if tags and isinstance(tags, compat_str):
|
||||
tags = [tags]
|
||||
|
||||
thumbnail = None
|
||||
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||
if poster:
|
||||
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||
if urlh:
|
||||
thumbnail = urlh.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': entity.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'description': clean_html(entity.get('description')) or None,
|
||||
'license': str_or_none(entity.get('license')),
|
||||
'timestamp': int_or_none(entity.get('time_created')),
|
||||
'uploader': strip_or_none(owner.get('name')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||
'view_count': int_or_none(entity.get('play:count')),
|
||||
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||
'tags': tags,
|
||||
'comment_count': int_or_none(entity.get('comments:count')),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class MindsFeedBaseIE(MindsBaseIE):
|
||||
_PAGE_SIZE = 150
|
||||
|
||||
def _entries(self, feed_id):
|
||||
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||
i = 1
|
||||
while True:
|
||||
data = self._call_api(
|
||||
'v2/feeds/container/%s/videos' % feed_id,
|
||||
feed_id, 'page %s' % i, query)
|
||||
entities = data.get('entities') or []
|
||||
for entity in entities:
|
||||
guid = entity.get('guid')
|
||||
if not guid:
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://www.minds.com/newsfeed/' + guid,
|
||||
MindsIE.ie_key(), guid)
|
||||
query['from_timestamp'] = data['load-next']
|
||||
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||
break
|
||||
i += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_id = self._match_id(url)
|
||||
feed = self._call_api(
|
||||
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(feed['guid']), feed_id,
|
||||
strip_or_none(feed.get('name')),
|
||||
feed.get('briefdescription'))
|
||||
|
||||
|
||||
class MindsChannelIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'channel'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||
_FEED_PATH = 'channel'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/ottman',
|
||||
'info_dict': {
|
||||
'id': 'ottman',
|
||||
'title': 'Bill Ottman',
|
||||
'description': 'Co-creator & CEO @minds',
|
||||
},
|
||||
'playlist_mincount': 54,
|
||||
}
|
||||
|
||||
|
||||
class MindsGroupIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'group'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||
_FEED_PATH = 'groups/group'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||
'info_dict': {
|
||||
'id': '785582576369672204',
|
||||
'title': 'Cooking Videos',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}
|
@@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
|
||||
cloudcast_url = cloudcast.get('url')
|
||||
if not cloudcast_url:
|
||||
continue
|
||||
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
||||
entries.append(self.url_result(
|
||||
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
|
||||
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||
|
||||
page_info = items['pageInfo']
|
||||
has_next_page = page_info['hasNextPage']
|
||||
@@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
_DESCRIPTION_KEY = 'biog'
|
||||
_ROOT_TYPE = 'user'
|
||||
_NODE_TEMPLATE = '''slug
|
||||
url'''
|
||||
url
|
||||
owner { username }'''
|
||||
|
||||
def _get_playlist_title(self, title, slug):
|
||||
return '%s (%s)' % (title, slug)
|
||||
@@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
_NODE_TEMPLATE = '''cloudcast {
|
||||
slug
|
||||
url
|
||||
owner { username }
|
||||
}'''
|
||||
|
||||
def _get_cloudcast(self, node):
|
||||
|
@@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):
|
||||
# no keywords
|
||||
'url': 'http://motherless.com/8B4BBC1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# see https://motherless.com/videos/recent for recent videos with
|
||||
# uploaded date in "ago" format
|
||||
'url': 'https://motherless.com/3C3E2CF',
|
||||
'info_dict': {
|
||||
'id': '3C3E2CF',
|
||||
'ext': 'mp4',
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -85,20 +102,28 @@ class MotherlessIE(InfoExtractor):
|
||||
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
(r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
(r'>([\d,.]+)\s+Favorites<',
|
||||
r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
||||
if 'Ago' in upload_date:
|
||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
||||
else:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
|
||||
'upload date', default=None))
|
||||
if not upload_date:
|
||||
uploaded_ago = self._search_regex(
|
||||
r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
|
||||
default=None)
|
||||
if uploaded_ago:
|
||||
delta = int(uploaded_ago[:-1])
|
||||
_AGO_UNITS = {
|
||||
'h': 'hours',
|
||||
'd': 'days',
|
||||
}
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
uploader_id = self._html_search_regex(
|
||||
|
@@ -253,6 +253,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
|
||||
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
||||
|
||||
@staticmethod
|
||||
def _extract_child_with_type(parent, t):
|
||||
return next(c for c in parent['children'] if c.get('type') == t)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
try:
|
||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||
@@ -278,6 +282,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
if not mgid:
|
||||
mgid = self._extract_triforce_mgid(webpage)
|
||||
|
||||
if not mgid:
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||
mgid = video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
return mgid
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -349,18 +360,6 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def extract_child_with_type(parent, t):
|
||||
children = parent['children']
|
||||
return next(c for c in children if c.get('type') == t)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||
return video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
|
||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtvjapan'
|
||||
|
@@ -1,104 +1,125 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
_TEST = {
|
||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||
'info_dict': {
|
||||
'id': 'kXzwOKyGlSA',
|
||||
'id': 'ae5Ag7B',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||
'uploader': 'CompilationChannel',
|
||||
'upload_date': '20131110',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||
'info_dict': {
|
||||
'id': 'aKolP3',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||
'uploader_id': 'rickmereki',
|
||||
'uploader': 'Rick Mereki',
|
||||
'upload_date': '20110803',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/KklwM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/Kk2X5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_EXTERNAL_VIDEO_PROVIDER = {
|
||||
'1': {
|
||||
'url': '%s',
|
||||
'ie_key': 'Youtube',
|
||||
},
|
||||
'2': {
|
||||
'url': 'http://player.vimeo.com/video/%s',
|
||||
'ie_key': 'Vimeo',
|
||||
},
|
||||
'3': {
|
||||
'url': 'http://instagram.com/p/%s',
|
||||
'ie_key': 'Instagram',
|
||||
},
|
||||
'4': {
|
||||
'url': 'http://vine.co/v/%s',
|
||||
'ie_key': 'Vine',
|
||||
},
|
||||
'title': 'Capybara Agility Training',
|
||||
'upload_date': '20191108',
|
||||
'timestamp': 1573237208,
|
||||
'categories': ['Awesome'],
|
||||
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
|
||||
'duration': 44,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
post_id = self._match_id(url)
|
||||
post = self._download_json(
|
||||
'https://9gag.com/v1/post', post_id, query={
|
||||
'id': post_id
|
||||
})['data']['post']
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
if post.get('type') != 'Animated':
|
||||
raise ExtractorError(
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
post_view = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||
webpage, 'post view'),
|
||||
display_id)
|
||||
title = post['title']
|
||||
|
||||
ie_key = None
|
||||
source_url = post_view.get('sourceUrl')
|
||||
if not source_url:
|
||||
external_video_id = post_view['videoExternalId']
|
||||
external_video_provider = post_view['videoExternalProvider']
|
||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||
title = post_view['title']
|
||||
description = post_view.get('description')
|
||||
view_count = str_to_int(post_view.get('externalView'))
|
||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||
duration = None
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for key, image in (post.get('images') or {}).items():
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
ext = determine_ext(image_url)
|
||||
image_id = key.strip('image')
|
||||
common = {
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
}
|
||||
if ext in ('jpg', 'png'):
|
||||
webp_url = image.get('webpUrl')
|
||||
if webp_url:
|
||||
t = common.copy()
|
||||
t.update({
|
||||
'id': image_id + '-webp',
|
||||
'url': webp_url,
|
||||
})
|
||||
thumbnails.append(t)
|
||||
common.update({
|
||||
'id': image_id,
|
||||
'ext': ext,
|
||||
})
|
||||
thumbnails.append(common)
|
||||
elif ext in ('webm', 'mp4'):
|
||||
if not duration:
|
||||
duration = int_or_none(image.get('duration'))
|
||||
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
|
||||
for vcodec in ('vp8', 'vp9', 'h265'):
|
||||
c_url = image.get(vcodec + 'Url')
|
||||
if not c_url:
|
||||
continue
|
||||
c_f = common.copy()
|
||||
c_f.update({
|
||||
'format_id': image_id + '-' + vcodec,
|
||||
'url': c_url,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
formats.append(c_f)
|
||||
common.update({
|
||||
'ext': ext,
|
||||
'format_id': image_id,
|
||||
})
|
||||
formats.append(common)
|
||||
self._sort_formats(formats)
|
||||
|
||||
section = try_get(post, lambda x: x['postSection']['name'])
|
||||
|
||||
tags = None
|
||||
post_tags = post.get('tags')
|
||||
if post_tags:
|
||||
tags = []
|
||||
for tag in post_tags:
|
||||
tag_key = tag.get('key')
|
||||
if not tag_key:
|
||||
continue
|
||||
tags.append(tag_key)
|
||||
|
||||
get_count = lambda x: int_or_none(post.get(x + 'Count'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': source_url,
|
||||
'ie_key': ie_key,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'id': post_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(post.get('creationTs')),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'like_count': get_count('upVote'),
|
||||
'dislike_count': get_count('downVote'),
|
||||
'comment_count': get_count('comments'),
|
||||
'age_limit': 18 if post.get('nsfw') == 1 else None,
|
||||
'categories': [section] if section else None,
|
||||
'tags': tags,
|
||||
}
|
||||
|
@@ -6,30 +6,40 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class NJPWWorldIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
|
||||
IE_DESC = '新日本プロレスワールド'
|
||||
_NETRC_MACHINE = 'njpwworld'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
|
||||
'info_dict': {
|
||||
'id': 's_series_00155_1_9',
|
||||
'ext': 'mp4',
|
||||
'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
|
||||
'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
|
||||
'tags': list,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # AES-encrypted m3u8
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
||||
'info_dict': {
|
||||
'id': 's_series_00563_16_bs',
|
||||
'ext': 'mp4',
|
||||
'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
|
||||
'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||
|
||||
@@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
|
||||
player = extract_attributes(mobj.group(0))
|
||||
player_path = player.get('href')
|
||||
if not player_path:
|
||||
continue
|
||||
kind = self._search_regex(
|
||||
r'(low|high)$', player.get('class') or '', 'kind',
|
||||
default='low')
|
||||
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
|
||||
player_path = '/intent?id=%s&type=url' % vid
|
||||
player_url = compat_urlparse.urljoin(url, player_path)
|
||||
player_page = self._download_webpage(
|
||||
player_url, video_id, note='Downloading player page')
|
||||
entries = self._parse_html5_media_entries(
|
||||
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
|
||||
m3u8_entry_protocol='m3u8_native')
|
||||
kind_formats = entries[0]['formats']
|
||||
for f in kind_formats:
|
||||
f['quality'] = 2 if kind == 'high' else 1
|
||||
formats.extend(kind_formats)
|
||||
formats.append({
|
||||
'url': player_url,
|
||||
'format_id': kind,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'quality': 2 if kind == 'high' else 1,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
post_content = get_element_by_class('post-content', webpage)
|
||||
tag_block = get_element_by_class('tag-block', webpage)
|
||||
tags = re.findall(
|
||||
r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
|
||||
) if post_content else None
|
||||
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
|
||||
) if tag_block else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
@@ -450,6 +450,18 @@ class PeerTubeIE(InfoExtractor):
|
||||
'tags': ['framasoft', 'peertube'],
|
||||
'categories': ['Science & Technology'],
|
||||
}
|
||||
}, {
|
||||
# Issue #26002
|
||||
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||
'info_dict': {
|
||||
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dot matrix printer shell demo',
|
||||
'uploader_id': '3',
|
||||
'timestamp': 1587401293,
|
||||
'upload_date': '20200420',
|
||||
'uploader': 'Drew DeVault',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||
'only_matching': True,
|
||||
@@ -526,7 +538,15 @@ class PeerTubeIE(InfoExtractor):
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
for file_ in video['files']:
|
||||
files = video.get('files') or []
|
||||
for playlist in (video.get('streamingPlaylists') or []):
|
||||
if not isinstance(playlist, dict):
|
||||
continue
|
||||
playlist_files = playlist.get('files')
|
||||
if not (playlist_files and isinstance(playlist_files, list)):
|
||||
continue
|
||||
files.extend(playlist_files)
|
||||
for file_ in files:
|
||||
if not isinstance(file_, dict):
|
||||
continue
|
||||
file_url = url_or_none(file_.get('fileUrl'))
|
||||
|
@@ -103,22 +103,28 @@ class RaiBaseIE(InfoExtractor):
|
||||
}.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
def _extract_subtitles(url, subtitle_url):
|
||||
def _extract_subtitles(url, video_data):
|
||||
STL_EXT = 'stl'
|
||||
SRT_EXT = 'srt'
|
||||
subtitles = {}
|
||||
if subtitle_url and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = urljoin(url, subtitle_url)
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
subtitles['it'] = [{
|
||||
'ext': 'stl',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
if subtitle_url.endswith(STL_EXT):
|
||||
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'].append({
|
||||
'ext': 'srt',
|
||||
'url': srt_url,
|
||||
subtitles_array = video_data.get('subtitlesArray') or []
|
||||
for k in ('subtitles', 'subtitlesUrl'):
|
||||
subtitles_array.append({'url': video_data.get(k)})
|
||||
for subtitle in subtitles_array:
|
||||
sub_url = subtitle.get('url')
|
||||
if sub_url and isinstance(sub_url, compat_str):
|
||||
sub_lang = subtitle.get('language') or 'it'
|
||||
sub_url = urljoin(url, sub_url)
|
||||
sub_ext = determine_ext(sub_url, SRT_EXT)
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': sub_ext,
|
||||
'url': sub_url,
|
||||
})
|
||||
if STL_EXT == sub_ext:
|
||||
subtitles[sub_lang].append({
|
||||
'ext': SRT_EXT,
|
||||
'url': sub_url[:-len(STL_EXT)] + SRT_EXT,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
|
||||
@@ -138,6 +144,9 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'duration': 6160,
|
||||
'series': 'Report',
|
||||
'season': '2013/14',
|
||||
'subtitles': {
|
||||
'it': 'count:2',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -145,6 +154,10 @@ class RaiPlayIE(RaiBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subtitles at 'subtitlesArray' key (see #27698)
|
||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -172,7 +185,7 @@ class RaiPlayIE(RaiBaseIE):
|
||||
if date_published and time_published:
|
||||
date_published += ' ' + time_published
|
||||
|
||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||
subtitles = self._extract_subtitles(url, video)
|
||||
|
||||
program_info = media.get('program_info') or {}
|
||||
season = media.get('season')
|
||||
@@ -326,6 +339,22 @@ class RaiIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key
|
||||
'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
|
||||
'info_dict': {
|
||||
'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
|
||||
'description': 'md5:d291b03407ec505f95f27970c0b025f4',
|
||||
'upload_date': '20150913',
|
||||
'subtitles': {
|
||||
'it': 'count:2',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Direct MMS URL
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||
@@ -366,7 +395,7 @@ class RaiIE(RaiBaseIE):
|
||||
'url': compat_urlparse.urljoin(url, thumbnail_url),
|
||||
})
|
||||
|
||||
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
|
||||
subtitles = self._extract_subtitles(url, media)
|
||||
|
||||
info = {
|
||||
'id': content_id,
|
||||
@@ -403,7 +432,8 @@ class RaiIE(RaiBaseIE):
|
||||
r'''(?x)
|
||||
(?:
|
||||
(?:initEdizione|drawMediaRaiTV)\(|
|
||||
<(?:[^>]+\bdata-id|var\s+uniquename)=
|
||||
<(?:[^>]+\bdata-id|var\s+uniquename)=|
|
||||
<iframe[^>]+\bsrc=
|
||||
)
|
||||
(["\'])
|
||||
(?:(?!\1).)*\bContentItem-(?P<id>%s)
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -18,7 +18,7 @@ class SBSIE(InfoExtractor):
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||
'info_dict': {
|
||||
'id': '320403011771',
|
||||
'id': '_rFBPRPO4pMR',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dingo Conservation (The Feed)',
|
||||
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
||||
@@ -34,6 +34,15 @@ class SBSIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -20,9 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
|
||||
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
return self._extract_triforce_mgid(webpage)
|
||||
|
||||
|
||||
class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
|
||||
@@ -40,16 +37,12 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
root_data = self._parse_json(self._search_regex(
|
||||
r'window\.__DATA__\s*=\s*({.+})',
|
||||
webpage, 'data'), None)
|
||||
|
||||
def find_sub_data(data, data_type):
|
||||
return next(c for c in data['children'] if c.get('type') == data_type)
|
||||
|
||||
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
|
||||
return c['props']['media']['video']['config']['uri']
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'arcEp': 'paramountnetwork.com',
|
||||
'imageEp': 'paramountnetwork.com',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
156
youtube_dl/extractor/spotify.py
Normal file
156
youtube_dl/extractor/spotify.py
Normal file
@@ -0,0 +1,156 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyBaseIE(InfoExtractor):
|
||||
_ACCESS_TOKEN = None
|
||||
_OPERATION_HASHES = {
|
||||
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||
}
|
||||
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ACCESS_TOKEN = self._download_json(
|
||||
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||
|
||||
def _call_api(self, operation, video_id, variables):
|
||||
return self._download_json(
|
||||
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||
'operationName': 'query' + operation,
|
||||
'variables': json.dumps(variables),
|
||||
'extensions': json.dumps({
|
||||
'persistedQuery': {
|
||||
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||
},
|
||||
})
|
||||
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
|
||||
|
||||
def _extract_episode(self, episode, series):
|
||||
episode_id = episode['id']
|
||||
title = episode['name'].strip()
|
||||
|
||||
formats = []
|
||||
audio_preview = episode.get('audioPreview') or {}
|
||||
audio_preview_url = audio_preview.get('url')
|
||||
if audio_preview_url:
|
||||
f = {
|
||||
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
audio_preview_format = audio_preview.get('format')
|
||||
if audio_preview_format:
|
||||
f['format_id'] = audio_preview_format
|
||||
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||
if mobj:
|
||||
f.update({
|
||||
'abr': int(mobj.group(2)),
|
||||
'ext': mobj.group(1).lower(),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||
item_url = item.get('url')
|
||||
if not (item_url and item.get('externallyHosted')):
|
||||
continue
|
||||
formats.append({
|
||||
'url': clean_podcast_url(item_url),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'duration': float_or_none(try_get(
|
||||
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||
'release_date': unified_strdate(try_get(
|
||||
episode, lambda x: x['releaseDate']['isoString'])),
|
||||
'series': series,
|
||||
}
|
||||
|
||||
|
||||
class SpotifyIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||
'info_dict': {
|
||||
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||
'ext': 'mp3',
|
||||
'title': 'From the archive: Why time management is ruining our lives',
|
||||
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||
'duration': 2083.605,
|
||||
'release_date': '20201217',
|
||||
'series': "The Guardian's Audio Long Reads",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('Episode', episode_id, {
|
||||
'uri': 'spotify:episode:' + episode_id
|
||||
})['episode']
|
||||
return self._extract_episode(
|
||||
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||
|
||||
|
||||
class SpotifyShowIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify:show'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||
'info_dict': {
|
||||
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||
'title': 'The Story from the Guardian',
|
||||
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
podcast = self._call_api('ShowEpisodes', show_id, {
|
||||
'limit': 1000000000,
|
||||
'offset': 0,
|
||||
'uri': 'spotify:show:' + show_id,
|
||||
})['podcast']
|
||||
podcast_name = podcast.get('name')
|
||||
|
||||
entries = []
|
||||
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
|
||||
episode = item.get('episode')
|
||||
if not episode:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, podcast_name))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, podcast_name, podcast.get('description'))
|
@@ -3,10 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -15,29 +18,35 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
IE_DESC = '3Q SDN'
|
||||
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
||||
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
||||
'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
|
||||
# https://player.3qsdn.com/demo.html
|
||||
'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
|
||||
'md5': '64a57396b16fa011b15e0ea60edce918',
|
||||
'info_dict': {
|
||||
'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'id': '7201c779-6b3c-11e7-a40e-002590c750be',
|
||||
'ext': 'mp4',
|
||||
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'title': 'Video Ads',
|
||||
'is_live': False,
|
||||
'description': 'Video Ads Demo',
|
||||
'timestamp': 1500334803,
|
||||
'upload_date': '20170717',
|
||||
'duration': 888.032,
|
||||
'subtitles': {
|
||||
'eng': 'count:1',
|
||||
},
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
|
||||
'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be',
|
||||
'info_dict': {
|
||||
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'id': '66e68995-11ca-11e8-9273-002590c750be',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
}, {
|
||||
# live audio stream
|
||||
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
||||
@@ -58,6 +67,14 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
# live video with rtmp link
|
||||
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
||||
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -70,73 +87,78 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
js = self._download_webpage(
|
||||
'http://playout.3qsdn.com/%s' % video_id, video_id,
|
||||
query={'js': 'true'})
|
||||
try:
|
||||
config = self._download_json(
|
||||
url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
||||
if any(p in js for p in (
|
||||
'>This content is not available in your country',
|
||||
'playout.3qsdn.com/forbidden')):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
stream_content = self._search_regex(
|
||||
r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
|
||||
'stream content', default='demand', group='content')
|
||||
|
||||
live = stream_content == 'live'
|
||||
|
||||
stream_type = self._search_regex(
|
||||
r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
|
||||
'stream type', default='video', group='type')
|
||||
live = config.get('streamContent') == 'live'
|
||||
aspect = float_or_none(config.get('aspect'))
|
||||
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
def extract_formats(item_url, item={}):
|
||||
if not item_url or item_url in urls:
|
||||
return
|
||||
urls.add(item_url)
|
||||
ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
item_url, video_id, mpd_id='mpd', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
item_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
if not self._is_valid_url(item_url, video_id):
|
||||
return
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': item.get('quality'),
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else ext,
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
f = self._parse_json(
|
||||
item_js, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not f:
|
||||
for source_type, source in (config.get('sources') or {}).items():
|
||||
if not source:
|
||||
continue
|
||||
extract_formats(f.get('src'), f)
|
||||
if source_type == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source, video_id, mpd_id='mpd', fatal=False))
|
||||
elif source_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'progressive':
|
||||
for s in source:
|
||||
src = s.get('src')
|
||||
if not (src and self._is_valid_url(src, video_id)):
|
||||
continue
|
||||
width = None
|
||||
format_id = ['http']
|
||||
ext = determine_ext(src)
|
||||
if ext:
|
||||
format_id.append(ext)
|
||||
height = int_or_none(s.get('height'))
|
||||
if height:
|
||||
format_id.append('%dp' % height)
|
||||
if aspect:
|
||||
width = int(height * aspect)
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'format_id': '-'.join(format_id),
|
||||
'height': height,
|
||||
'source_preference': 0,
|
||||
'url': src,
|
||||
'vcodec': 'none' if height == 0 else None,
|
||||
'width': width,
|
||||
})
|
||||
for f in formats:
|
||||
if f.get('acodec') == 'none':
|
||||
f['preference'] = -40
|
||||
elif f.get('vcodec') == 'none':
|
||||
f['preference'] = -50
|
||||
self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id'))
|
||||
|
||||
# More relaxed version to collect additional URLs and acting
|
||||
# as a future-proof fallback
|
||||
for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
|
||||
extract_formats(src)
|
||||
subtitles = {}
|
||||
for subtitle in (config.get('subtitles') or []):
|
||||
src = subtitle.get('src')
|
||||
if not src:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('label') or 'eng', []).append({
|
||||
'url': src,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._live_title(video_id) if live else video_id
|
||||
title = config.get('title') or video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if live else title,
|
||||
'thumbnail': config.get('poster') or None,
|
||||
'description': config.get('description') or None,
|
||||
'timestamp': parse_iso8601(config.get('upload_date')),
|
||||
'duration': float_or_none(config.get('vlength')) or None,
|
||||
'is_live': live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
193
youtube_dl/extractor/trovo.py
Normal file
193
youtube_dl/extractor/trovo.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TrovoBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
|
||||
|
||||
def _extract_streamer_info(self, data):
|
||||
streamer_info = data.get('streamerInfo') or {}
|
||||
username = streamer_info.get('userName')
|
||||
return {
|
||||
'uploader': streamer_info.get('nickName'),
|
||||
'uploader_id': str_or_none(streamer_info.get('uid')),
|
||||
'uploader_url': 'https://trovo.live/' + username if username else None,
|
||||
}
|
||||
|
||||
|
||||
class TrovoIE(TrovoBaseIE):
|
||||
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
live_info = self._download_json(
|
||||
'https://gql.trovo.live/', username, query={
|
||||
'query': '''{
|
||||
getLiveInfo(params: {userName: "%s"}) {
|
||||
isLive
|
||||
programInfo {
|
||||
coverUrl
|
||||
id
|
||||
streamInfo {
|
||||
desc
|
||||
playUrl
|
||||
}
|
||||
title
|
||||
}
|
||||
streamerInfo {
|
||||
nickName
|
||||
uid
|
||||
userName
|
||||
}
|
||||
}
|
||||
}''' % username,
|
||||
})['data']['getLiveInfo']
|
||||
if live_info.get('isLive') == 0:
|
||||
raise ExtractorError('%s is offline' % username, expected=True)
|
||||
program_info = live_info['programInfo']
|
||||
program_id = program_info['id']
|
||||
title = self._live_title(program_info['title'])
|
||||
|
||||
formats = []
|
||||
for stream_info in (program_info.get('streamInfo') or []):
|
||||
play_url = stream_info.get('playUrl')
|
||||
if not play_url:
|
||||
continue
|
||||
format_id = stream_info.get('desc')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'url': play_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': program_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': program_info.get('coverUrl'),
|
||||
'is_live': True,
|
||||
}
|
||||
info.update(self._extract_streamer_info(live_info))
|
||||
return info
|
||||
|
||||
|
||||
class TrovoVodIE(TrovoBaseIE):
|
||||
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
|
||||
'info_dict': {
|
||||
'id': 'ltv-100095501_100095501_1609596043',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
|
||||
'uploader': 'Exsl',
|
||||
'timestamp': 1609640305,
|
||||
'upload_date': '20210103',
|
||||
'uploader_id': '100095501',
|
||||
'duration': 43977,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': 'mincount:8',
|
||||
'categories': ['Grand Theft Auto V'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://trovo.live/clip/lc-5285890810184026005',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vid = self._match_id(url)
|
||||
resp = self._download_json(
|
||||
'https://gql.trovo.live/', vid, data=json.dumps([{
|
||||
'query': '''{
|
||||
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
|
||||
VodDetailInfos
|
||||
}
|
||||
}''' % vid,
|
||||
}, {
|
||||
'query': '''{
|
||||
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
|
||||
commentList {
|
||||
author {
|
||||
nickName
|
||||
uid
|
||||
}
|
||||
commentID
|
||||
content
|
||||
createdAt
|
||||
parentID
|
||||
}
|
||||
}
|
||||
}''' % vid,
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
|
||||
vod_info = vod_detail_info['vodInfo']
|
||||
title = vod_info['title']
|
||||
|
||||
language = vod_info.get('languageName')
|
||||
formats = []
|
||||
for play_info in (vod_info.get('playInfos') or []):
|
||||
play_url = play_info.get('playUrl')
|
||||
if not play_url:
|
||||
continue
|
||||
format_id = play_info.get('desc')
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'filesize': int_or_none(play_info.get('fileSize')),
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'language': language,
|
||||
'protocol': 'm3u8_native',
|
||||
'tbr': int_or_none(play_info.get('bitrate')),
|
||||
'url': play_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
category = vod_info.get('categoryName')
|
||||
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
|
||||
|
||||
comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
|
||||
comments = []
|
||||
for comment in comment_list:
|
||||
content = comment.get('content')
|
||||
if not content:
|
||||
continue
|
||||
author = comment.get('author') or {}
|
||||
parent = comment.get('parentID')
|
||||
comments.append({
|
||||
'author': author.get('nickName'),
|
||||
'author_id': str_or_none(author.get('uid')),
|
||||
'id': str_or_none(comment.get('commentID')),
|
||||
'text': content,
|
||||
'timestamp': int_or_none(comment.get('createdAt')),
|
||||
'parent': 'root' if parent == 0 else str_or_none(parent),
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': vid,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': vod_info.get('coverUrl'),
|
||||
'timestamp': int_or_none(vod_info.get('publishTs')),
|
||||
'duration': int_or_none(vod_info.get('duration')),
|
||||
'view_count': get_count('watch'),
|
||||
'like_count': get_count('like'),
|
||||
'comment_count': get_count('comment'),
|
||||
'comments': comments,
|
||||
'categories': [category] if category else None,
|
||||
}
|
||||
info.update(self._extract_streamer_info(vod_detail_info))
|
||||
return info
|
@@ -9,7 +9,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
@@ -18,6 +17,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -42,30 +42,16 @@ class TwitchBaseIE(InfoExtractor):
|
||||
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, path, item_id, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {}).copy()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
})
|
||||
kwargs.update({
|
||||
'headers': headers,
|
||||
'expected_status': (400, 410),
|
||||
})
|
||||
response = self._download_json(
|
||||
'%s/%s' % (self._API_BASE, path), item_id,
|
||||
*args, **compat_kwargs(kwargs))
|
||||
self._handle_error(response)
|
||||
return response
|
||||
_OPERATION_HASHES = {
|
||||
'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
|
||||
'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
|
||||
'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
|
||||
'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
|
||||
'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
|
||||
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
|
||||
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
|
||||
'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -91,14 +77,14 @@ class TwitchBaseIE(InfoExtractor):
|
||||
|
||||
headers = {
|
||||
'Referer': page_url,
|
||||
'Origin': page_url,
|
||||
'Origin': 'https://www.twitch.tv',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
post_url, None, note, data=json.dumps(form).encode(),
|
||||
headers=headers, expected_status=400)
|
||||
error = response.get('error_description') or response.get('error_code')
|
||||
error = dict_get(response, ('error', 'error_description', 'error_code'))
|
||||
if error:
|
||||
fail(error)
|
||||
|
||||
@@ -151,13 +137,50 @@ class TwitchBaseIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
def _download_access_token(self, channel_name):
|
||||
return self._call_api(
|
||||
'api/channels/%s/access_token' % channel_name, channel_name,
|
||||
'Downloading access token JSON')
|
||||
def _download_base_gql(self, video_id, ops, note, fatal=True):
|
||||
headers = {
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
}
|
||||
gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
|
||||
if gql_auth:
|
||||
headers['Authorization'] = 'OAuth ' + gql_auth.value
|
||||
return self._download_json(
|
||||
'https://gql.twitch.tv/gql', video_id, note,
|
||||
data=json.dumps(ops).encode(),
|
||||
headers=headers, fatal=fatal)
|
||||
|
||||
def _extract_channel_id(self, token, channel_name):
|
||||
return compat_str(self._parse_json(token, channel_name)['channel_id'])
|
||||
def _download_gql(self, video_id, ops, note, fatal=True):
|
||||
for op in ops:
|
||||
op['extensions'] = {
|
||||
'persistedQuery': {
|
||||
'version': 1,
|
||||
'sha256Hash': self._OPERATION_HASHES[op['operationName']],
|
||||
}
|
||||
}
|
||||
return self._download_base_gql(video_id, ops, note)
|
||||
|
||||
def _download_access_token(self, video_id, token_kind, param_name):
|
||||
method = '%sPlaybackAccessToken' % token_kind
|
||||
ops = {
|
||||
'query': '''{
|
||||
%s(
|
||||
%s: "%s",
|
||||
params: {
|
||||
platform: "web",
|
||||
playerBackend: "mediaplayer",
|
||||
playerType: "site"
|
||||
}
|
||||
)
|
||||
{
|
||||
value
|
||||
signature
|
||||
}
|
||||
}''' % (method, param_name, video_id),
|
||||
}
|
||||
return self._download_base_gql(
|
||||
video_id, ops,
|
||||
'Downloading %s access token GraphQL' % token_kind)['data'][method]
|
||||
|
||||
|
||||
class TwitchVodIE(TwitchBaseIE):
|
||||
@@ -170,8 +193,6 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_ITEM_TYPE = 'vod'
|
||||
_ITEM_SHORTCUT = 'v'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
|
||||
@@ -181,7 +202,7 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
'title': 'LCK Summer Split - Week 6 Day 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 17208,
|
||||
'timestamp': 1435131709,
|
||||
'timestamp': 1435131734,
|
||||
'upload_date': '20150624',
|
||||
'uploader': 'Riot Games',
|
||||
'uploader_id': 'riotgames',
|
||||
@@ -230,10 +251,20 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
}]
|
||||
|
||||
def _download_info(self, item_id):
|
||||
return self._extract_info(
|
||||
self._call_api(
|
||||
'kraken/videos/%s' % item_id, item_id,
|
||||
'Downloading video info JSON'))
|
||||
data = self._download_gql(
|
||||
item_id, [{
|
||||
'operationName': 'VideoMetadata',
|
||||
'variables': {
|
||||
'channelLogin': '',
|
||||
'videoID': item_id,
|
||||
},
|
||||
}],
|
||||
'Downloading stream metadata GraphQL')[0]['data']
|
||||
video = data.get('video')
|
||||
if video is None:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % item_id, expected=True)
|
||||
return self._extract_info_gql(video, item_id)
|
||||
|
||||
@staticmethod
|
||||
def _extract_info(info):
|
||||
@@ -272,13 +303,33 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_info_gql(info, item_id):
|
||||
vod_id = info.get('id') or item_id
|
||||
# id backward compatibility for download archives
|
||||
if vod_id[0] != 'v':
|
||||
vod_id = 'v%s' % vod_id
|
||||
thumbnail = url_or_none(info.get('previewThumbnailURL'))
|
||||
if thumbnail:
|
||||
for p in ('width', 'height'):
|
||||
thumbnail = thumbnail.replace('{%s}' % p, '0')
|
||||
return {
|
||||
'id': vod_id,
|
||||
'title': info.get('title') or 'Untitled Broadcast',
|
||||
'description': info.get('description'),
|
||||
'duration': int_or_none(info.get('lengthSeconds')),
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
|
||||
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
|
||||
'timestamp': unified_timestamp(info.get('publishedAt')),
|
||||
'view_count': int_or_none(info.get('viewCount')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
vod_id = self._match_id(url)
|
||||
|
||||
info = self._download_info(vod_id)
|
||||
access_token = self._call_api(
|
||||
'api/vods/%s/access_token' % vod_id, vod_id,
|
||||
'Downloading %s access token' % self._ITEM_TYPE)
|
||||
access_token = self._download_access_token(vod_id, 'video', 'id')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/vod/%s.m3u8?%s' % (
|
||||
@@ -289,8 +340,8 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
'allow_spectre': 'true',
|
||||
'player': 'twitchweb',
|
||||
'playlist_include_framerate': 'true',
|
||||
'nauth': access_token['token'],
|
||||
'nauthsig': access_token['sig'],
|
||||
'nauth': access_token['value'],
|
||||
'nauthsig': access_token['signature'],
|
||||
})),
|
||||
vod_id, 'mp4', entry_protocol='m3u8_native')
|
||||
|
||||
@@ -333,37 +384,7 @@ def _make_video_result(node):
|
||||
}
|
||||
|
||||
|
||||
class TwitchGraphQLBaseIE(TwitchBaseIE):
|
||||
_PAGE_LIMIT = 100
|
||||
|
||||
_OPERATION_HASHES = {
|
||||
'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
|
||||
'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
|
||||
'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
|
||||
'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
|
||||
'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
|
||||
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
|
||||
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
|
||||
}
|
||||
|
||||
def _download_gql(self, video_id, ops, note, fatal=True):
|
||||
for op in ops:
|
||||
op['extensions'] = {
|
||||
'persistedQuery': {
|
||||
'version': 1,
|
||||
'sha256Hash': self._OPERATION_HASHES[op['operationName']],
|
||||
}
|
||||
}
|
||||
return self._download_json(
|
||||
'https://gql.twitch.tv/gql', video_id, note,
|
||||
data=json.dumps(ops).encode(),
|
||||
headers={
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
}, fatal=fatal)
|
||||
|
||||
|
||||
class TwitchCollectionIE(TwitchGraphQLBaseIE):
|
||||
class TwitchCollectionIE(TwitchBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -400,7 +421,9 @@ class TwitchCollectionIE(TwitchGraphQLBaseIE):
|
||||
entries, playlist_id=collection_id, playlist_title=title)
|
||||
|
||||
|
||||
class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
|
||||
class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
_PAGE_LIMIT = 100
|
||||
|
||||
def _entries(self, channel_name, *args):
|
||||
cursor = None
|
||||
variables_common = self._make_variables(channel_name, *args)
|
||||
@@ -440,49 +463,6 @@ class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
|
||||
if not cursor or not isinstance(cursor, compat_str):
|
||||
break
|
||||
|
||||
# Deprecated kraken v5 API
|
||||
def _entries_kraken(self, channel_name, broadcast_type, sort):
|
||||
access_token = self._download_access_token(channel_name)
|
||||
channel_id = self._extract_channel_id(access_token['token'], channel_name)
|
||||
offset = 0
|
||||
counter_override = None
|
||||
for counter in itertools.count(1):
|
||||
response = self._call_api(
|
||||
'kraken/channels/%s/videos/' % channel_id,
|
||||
channel_id,
|
||||
'Downloading video JSON page %s' % (counter_override or counter),
|
||||
query={
|
||||
'offset': offset,
|
||||
'limit': self._PAGE_LIMIT,
|
||||
'broadcast_type': broadcast_type,
|
||||
'sort': sort,
|
||||
})
|
||||
videos = response.get('videos')
|
||||
if not isinstance(videos, list):
|
||||
break
|
||||
for video in videos:
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': TwitchVodIE.ie_key(),
|
||||
'id': video.get('_id'),
|
||||
'url': video_url,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': unified_timestamp(video.get('published_at')),
|
||||
'duration': float_or_none(video.get('length')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'language': video.get('language'),
|
||||
}
|
||||
offset += self._PAGE_LIMIT
|
||||
total = int_or_none(response.get('_total'))
|
||||
if total and offset >= total:
|
||||
break
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
|
||||
@@ -724,7 +704,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
|
||||
playlist_title='%s - Collections' % channel_name)
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchGraphQLBaseIE):
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -814,8 +794,9 @@ class TwitchStreamIE(TwitchGraphQLBaseIE):
|
||||
if not stream:
|
||||
raise ExtractorError('%s is offline' % channel_name, expected=True)
|
||||
|
||||
access_token = self._download_access_token(channel_name)
|
||||
token = access_token['token']
|
||||
access_token = self._download_access_token(
|
||||
channel_name, 'stream', 'channelName')
|
||||
token = access_token['value']
|
||||
|
||||
stream_id = stream.get('id') or channel_name
|
||||
query = {
|
||||
@@ -826,7 +807,7 @@ class TwitchStreamIE(TwitchGraphQLBaseIE):
|
||||
'player': 'twitchweb',
|
||||
'playlist_include_framerate': 'true',
|
||||
'segment_preference': '4',
|
||||
'sig': access_token['sig'].encode('utf-8'),
|
||||
'sig': access_token['signature'].encode('utf-8'),
|
||||
'token': token.encode('utf-8'),
|
||||
}
|
||||
formats = self._extract_m3u8_formats(
|
||||
@@ -912,8 +893,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
clip = self._download_json(
|
||||
'https://gql.twitch.tv/gql', video_id, data=json.dumps({
|
||||
clip = self._download_base_gql(
|
||||
video_id, {
|
||||
'query': '''{
|
||||
clip(slug: "%s") {
|
||||
broadcaster {
|
||||
@@ -937,10 +918,7 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
}
|
||||
viewCount
|
||||
}
|
||||
}''' % video_id,
|
||||
}).encode(), headers={
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
})['data']['clip']
|
||||
}''' % video_id}, 'Downloading clip GraphQL')['data']['clip']
|
||||
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
|
@@ -373,6 +373,24 @@ class TwitterIE(TwitterBaseIE):
|
||||
'uploader_id': '1eVjYOLGkGrQL',
|
||||
},
|
||||
'add_ie': ['TwitterBroadcast'],
|
||||
}, {
|
||||
# unified card
|
||||
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
||||
'info_dict': {
|
||||
'id': '1349794411333394432',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
|
||||
'uploader': 'Brooklyn Nets',
|
||||
'uploader_id': 'BrooklynNets',
|
||||
'duration': 324.484,
|
||||
'timestamp': 1610651040,
|
||||
'upload_date': '20210114',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Twitch Clip Embed
|
||||
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
||||
@@ -389,6 +407,22 @@ class TwitterIE(TwitterBaseIE):
|
||||
# appplayer card
|
||||
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video_direct_message card
|
||||
'url': 'https://twitter.com/qarev001/status/1348948114569269251',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# poll2choice_video card
|
||||
'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# poll3choice_video card
|
||||
'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# poll4choice_video card
|
||||
'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -433,8 +467,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
||||
if media and media.get('type') != 'photo':
|
||||
def extract_from_video_info(media):
|
||||
video_info = media.get('video_info') or {}
|
||||
|
||||
formats = []
|
||||
@@ -461,6 +494,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
||||
})
|
||||
|
||||
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
||||
if media and media.get('type') != 'photo':
|
||||
extract_from_video_info(media)
|
||||
else:
|
||||
card = status.get('card')
|
||||
if card:
|
||||
@@ -493,7 +530,12 @@ class TwitterIE(TwitterBaseIE):
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('card_url'),
|
||||
})
|
||||
# amplify, promo_video_website, promo_video_convo, appplayer, ...
|
||||
elif card_name == 'unified_card':
|
||||
media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
|
||||
extract_from_video_info(next(iter(media_entities.values())))
|
||||
# amplify, promo_video_website, promo_video_convo, appplayer,
|
||||
# video_direct_message, poll2choice_video, poll3choice_video,
|
||||
# poll4choice_video, ...
|
||||
else:
|
||||
is_amplify = card_name == 'amplify'
|
||||
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
|
||||
|
@@ -1,12 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
@@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
_FORMATS = (
|
||||
(200, 416, 234),
|
||||
(400, 480, 270),
|
||||
(600, 640, 360),
|
||||
(1200, 640, 360),
|
||||
(1800, 960, 540),
|
||||
(2500, 1280, 720),
|
||||
)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
|
||||
@@ -97,46 +85,20 @@ class WatIE(InfoExtractor):
|
||||
return red_url
|
||||
return None
|
||||
|
||||
def remove_bitrate_limit(manifest_url):
|
||||
return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
|
||||
|
||||
formats = []
|
||||
try:
|
||||
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||
m3u8_url = manifest_urls.get('hls')
|
||||
if m3u8_url:
|
||||
m3u8_url = remove_bitrate_limit(m3u8_url)
|
||||
for m3u8_alt_url in alt_urls(m3u8_url):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_alt_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
mpd_url = manifest_urls.get('mpd')
|
||||
if mpd_url:
|
||||
mpd_url = remove_bitrate_limit(mpd_url)
|
||||
for mpd_alt_url in alt_urls(mpd_url):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
except ExtractorError:
|
||||
abr = 64
|
||||
for vbr, width, height in self._FORMATS:
|
||||
tbr = vbr + abr
|
||||
format_id = 'http-%s' % tbr
|
||||
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
|
||||
if self._is_valid_url(fmt_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': fmt_url,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
manifest_urls = self._download_json(
|
||||
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
|
||||
m3u8_url = manifest_urls.get('hls')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
mpd_url = manifest_urls.get('mpd')
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
|
||||
upload_date = unified_strdate(date_diffusion) if date_diffusion else None
|
||||
|
@@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, country, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not country:
|
||||
country = 'us'
|
||||
else:
|
||||
country = country.split('-')[0]
|
||||
api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
|
||||
|
||||
for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
|
||||
content = self._download_json(
|
||||
api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
|
||||
display_id, 'Downloading content JSON metadata', fatal=i == 1)
|
||||
if content:
|
||||
item = content['items'][0]
|
||||
break
|
||||
|
||||
if item.get('type') != 'video':
|
||||
entries = []
|
||||
|
||||
cover = item.get('cover') or {}
|
||||
if cover.get('type') == 'yvideo':
|
||||
cover_url = cover.get('url')
|
||||
if cover_url:
|
||||
entries.append(self.url_result(
|
||||
cover_url, 'Yahoo', cover.get('uuid')))
|
||||
|
||||
for e in item.get('body', []):
|
||||
if e.get('type') == 'videoIframe':
|
||||
iframe_url = e.get('url')
|
||||
if not iframe_url:
|
||||
continue
|
||||
entries.append(self.url_result(iframe_url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, item.get('uuid'),
|
||||
item.get('title'), item.get('summary'))
|
||||
|
||||
video_id = item['uuid']
|
||||
def _extract_yahoo_video(self, video_id, country):
|
||||
video = self._download_json(
|
||||
api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
|
||||
'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
|
||||
video_id, 'Downloading video JSON metadata')[0]
|
||||
title = video['title']
|
||||
|
||||
@@ -298,7 +261,6 @@ class YahooIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'display_id': display_id,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': parse_iso8601(video.get('publish_time')),
|
||||
@@ -311,6 +273,44 @@ class YahooIE(InfoExtractor):
|
||||
'episode_number': int_or_none(series_info.get('episode_number')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, country, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not country:
|
||||
country = 'us'
|
||||
else:
|
||||
country = country.split('-')[0]
|
||||
|
||||
item = self._download_json(
|
||||
'https://%s.yahoo.com/caas/content/article' % country, display_id,
|
||||
'Downloading content JSON metadata', query={
|
||||
'url': url
|
||||
})['items'][0]['data']['partnerData']
|
||||
|
||||
if item.get('type') != 'video':
|
||||
entries = []
|
||||
|
||||
cover = item.get('cover') or {}
|
||||
if cover.get('type') == 'yvideo':
|
||||
cover_url = cover.get('url')
|
||||
if cover_url:
|
||||
entries.append(self.url_result(
|
||||
cover_url, 'Yahoo', cover.get('uuid')))
|
||||
|
||||
for e in (item.get('body') or []):
|
||||
if e.get('type') == 'videoIframe':
|
||||
iframe_url = e.get('url')
|
||||
if not iframe_url:
|
||||
continue
|
||||
entries.append(self.url_result(iframe_url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, item.get('uuid'),
|
||||
item.get('title'), item.get('summary'))
|
||||
|
||||
info = self._extract_yahoo_video(item['uuid'], country)
|
||||
info['display_id'] = display_id
|
||||
return info
|
||||
|
||||
|
||||
class YahooSearchIE(SearchInfoExtractor):
|
||||
IE_DESC = 'Yahoo screen search'
|
||||
|
@@ -60,6 +60,9 @@ class YouPornIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.youporn.com/watch/505835',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -88,7 +91,7 @@ class YouPornIE(InfoExtractor):
|
||||
# Main source
|
||||
definitions = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
|
||||
r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
|
||||
'media definitions', default='[]'),
|
||||
video_id, fatal=False)
|
||||
if definitions:
|
||||
@@ -100,7 +103,7 @@ class YouPornIE(InfoExtractor):
|
||||
links.append(video_url)
|
||||
|
||||
# Fallback #1, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2 (unavailable as at 22.06.2017)
|
||||
@@ -128,8 +131,9 @@ class YouPornIE(InfoExtractor):
|
||||
# Video URL's path looks like this:
|
||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||
# We will benefit from it by extracting some metadata
|
||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
|
||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
|
@@ -308,6 +308,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), video_id, fatal=False)
|
||||
|
||||
def _extract_video(self, renderer):
|
||||
video_id = renderer['videoId']
|
||||
title = try_get(
|
||||
renderer,
|
||||
(lambda x: x['title']['runs'][0]['text'],
|
||||
lambda x: x['title']['simpleText']), compat_str)
|
||||
description = try_get(
|
||||
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
||||
compat_str)
|
||||
duration = parse_duration(try_get(
|
||||
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||
view_count_text = try_get(
|
||||
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(
|
||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
@@ -2765,36 +2795,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
if renderer:
|
||||
return renderer
|
||||
|
||||
def _extract_video(self, renderer):
|
||||
video_id = renderer.get('videoId')
|
||||
title = try_get(
|
||||
renderer,
|
||||
(lambda x: x['title']['runs'][0]['text'],
|
||||
lambda x: x['title']['simpleText']), compat_str)
|
||||
description = try_get(
|
||||
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
||||
compat_str)
|
||||
duration = parse_duration(try_get(
|
||||
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||
view_count_text = try_get(
|
||||
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(
|
||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
def _grid_entries(self, grid_renderer):
|
||||
for item in grid_renderer['items']:
|
||||
if not isinstance(item, dict):
|
||||
@@ -3417,46 +3417,29 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
||||
list)
|
||||
if not slr_contents:
|
||||
break
|
||||
isr_contents = try_get(
|
||||
slr_contents,
|
||||
lambda x: x[0]['itemSectionRenderer']['contents'],
|
||||
list)
|
||||
if not isr_contents:
|
||||
break
|
||||
for content in isr_contents:
|
||||
if not isinstance(content, dict):
|
||||
for slr_content in slr_contents:
|
||||
isr_contents = try_get(
|
||||
slr_content,
|
||||
lambda x: x['itemSectionRenderer']['contents'],
|
||||
list)
|
||||
if not isr_contents:
|
||||
continue
|
||||
video = content.get('videoRenderer')
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_id = video.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
|
||||
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
|
||||
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'^(\d+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
total += 1
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
if total == n:
|
||||
return
|
||||
for content in isr_contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
video = content.get('videoRenderer')
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_id = video.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
yield self._extract_video(video)
|
||||
total += 1
|
||||
if total == n:
|
||||
return
|
||||
token = try_get(
|
||||
slr_contents,
|
||||
lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||
lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||
compat_str)
|
||||
if not token:
|
||||
break
|
||||
|
@@ -689,6 +689,10 @@ def parseOpts(overrideArguments=None):
|
||||
'-o', '--output',
|
||||
dest='outtmpl', metavar='TEMPLATE',
|
||||
help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
|
||||
filesystem.add_option(
|
||||
'--output-na-placeholder',
|
||||
dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
|
||||
help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
|
||||
filesystem.add_option(
|
||||
'--autonumber-size',
|
||||
dest='autonumber_size', metavar='NUMBER', type=int,
|
||||
@@ -782,7 +786,7 @@ def parseOpts(overrideArguments=None):
|
||||
postproc.add_option(
|
||||
'-x', '--extract-audio',
|
||||
action='store_true', dest='extractaudio', default=False,
|
||||
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
|
||||
help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
|
||||
postproc.add_option(
|
||||
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
|
||||
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.01.03'
|
||||
__version__ = '2021.01.24.1'
|
||||
|
Reference in New Issue
Block a user