mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 22:28:37 +09:00
Compare commits
165 Commits
2020.12.22
...
f9201cef58
Author | SHA1 | Date | |
---|---|---|---|
![]() |
f9201cef58 | ||
![]() |
26499ba823 | ||
![]() |
58f6c2112d | ||
![]() |
de026a6acd | ||
![]() |
d4564afc70 | ||
![]() |
360a5e0f60 | ||
![]() |
55a3ca16d3 | ||
![]() |
ef50cb3fda | ||
![]() |
8673f4344c | ||
![]() |
f1487d4fca | ||
![]() |
0cd4c402f0 | ||
![]() |
9c9b458145 | ||
![]() |
9d50f86232 | ||
![]() |
7e92f9015e | ||
![]() |
aa860b8016 | ||
![]() |
b484097b01 | ||
![]() |
ab9001dab5 | ||
![]() |
879866a230 | ||
![]() |
8e5477d036 | ||
![]() |
1e8e5d5238 | ||
![]() |
d81a213cfb | ||
![]() |
7c2d18a13f | ||
![]() |
2408e6d26a | ||
![]() |
cf862771d7 | ||
![]() |
a938f111ed | ||
![]() |
4759543f6e | ||
![]() |
d0fc289f45 | ||
![]() |
70f572585d | ||
![]() |
c2d06aef60 | ||
![]() |
ff1e765400 | ||
![]() |
170e1c1995 | ||
![]() |
61e669acff | ||
![]() |
2c337f4e85 | ||
![]() |
bf6a74c620 | ||
![]() |
38a967c98e | ||
![]() |
3a61e6d360 | ||
![]() |
3d8e32dcc0 | ||
![]() |
8f29b2dd38 | ||
![]() |
a29e340efa | ||
![]() |
b13f29098f | ||
![]() |
430c4bc9d0 | ||
![]() |
4ae243fc6c | ||
![]() |
8f20ad36dc | ||
![]() |
799c794947 | ||
![]() |
1ae7ae0b96 | ||
![]() |
ccc7112291 | ||
![]() |
5b24f8f505 | ||
![]() |
fcd90d2583 | ||
![]() |
8f757c7353 | ||
![]() |
be1a3f2d11 | ||
![]() |
ecae54a98d | ||
![]() |
f318882955 | ||
![]() |
c3399cac19 | ||
![]() |
9237aaa77f | ||
![]() |
766fcdd0fa | ||
![]() |
f6ea29e24b | ||
![]() |
8a3797a4ab | ||
![]() |
745db8899d | ||
![]() |
83db801cbf | ||
![]() |
964a8eb754 | ||
![]() |
ac61f2e058 | ||
![]() |
8487e8b98a | ||
![]() |
9c484c0019 | ||
![]() |
0e96b4b5ce | ||
![]() |
a563c97c5c | ||
![]() |
e88c9ef62a | ||
![]() |
0889eb33e0 | ||
![]() |
0021a2b9a1 | ||
![]() |
19ec468635 | ||
![]() |
491ee7efe4 | ||
![]() |
8522bcd97c | ||
![]() |
ac71fd5919 | ||
![]() |
8e953dcbb1 | ||
![]() |
f4afb9a6a8 | ||
![]() |
d5b8cf093c | ||
![]() |
5c6e84c0ff | ||
![]() |
1aaee908b9 | ||
![]() |
b2d9fd9c9f | ||
![]() |
bc2f83b95e | ||
![]() |
85de33b04e | ||
![]() |
7dfd966848 | ||
![]() |
a25d03d7cb | ||
![]() |
cabfd4b1f0 | ||
![]() |
7b643d4cd0 | ||
![]() |
1f1d01d498 | ||
![]() |
21a42e2588 | ||
![]() |
2df93a0c4a | ||
![]() |
75972e200d | ||
![]() |
d0d838638c | ||
![]() |
8c17afc471 | ||
![]() |
40d66e07df | ||
![]() |
ab89a8678b | ||
![]() |
4d7d056909 | ||
![]() |
c35bc82606 | ||
![]() |
2f56caf083 | ||
![]() |
4066945919 | ||
![]() |
2a84694b1e | ||
![]() |
4046ffe1e1 | ||
![]() |
d1d0612160 | ||
![]() |
7b0f04ed1f | ||
![]() |
2e21b06ea2 | ||
![]() |
a6f75e6e89 | ||
![]() |
bd18824c2a | ||
![]() |
bdd044e67b | ||
![]() |
f7e95fb2a0 | ||
![]() |
9dd674e1d2 | ||
![]() |
9c1e164e0c | ||
![]() |
c706fbe9fe | ||
![]() |
ebdcf70b0d | ||
![]() |
5966095e65 | ||
![]() |
9ee984fc76 | ||
![]() |
53528e1d23 | ||
![]() |
c931c4b8dd | ||
![]() |
7acd042bbb | ||
![]() |
bcfe485e01 | ||
![]() |
479cc6d5a1 | ||
![]() |
38286ee729 | ||
![]() |
1a95953867 | ||
![]() |
71febd1c52 | ||
![]() |
f1bc56c99b | ||
![]() |
64e419bd73 | ||
![]() |
782ea947b4 | ||
![]() |
f27224d57b | ||
![]() |
c007188598 | ||
![]() |
af93ecfd88 | ||
![]() |
794771a164 | ||
![]() |
6f2eaaf73d | ||
![]() |
4c7a4dbc4d | ||
![]() |
f86b299d0e | ||
![]() |
e474996541 | ||
![]() |
aed617e311 | ||
![]() |
0fa67c1d68 | ||
![]() |
365b3cc72d | ||
![]() |
a272fe21a8 | ||
![]() |
cec1c2f211 | ||
![]() |
12053450dc | ||
![]() |
46cffb0c47 | ||
![]() |
c32a059f52 | ||
![]() |
6911312e53 | ||
![]() |
f22b5a6b96 | ||
![]() |
58e55198c1 | ||
![]() |
d61ed9f2f1 | ||
![]() |
8bc4c6350e | ||
![]() |
cfa4ffa23b | ||
![]() |
4f1dc1463d | ||
![]() |
17e0f41d34 | ||
![]() |
b57b27ff8f | ||
![]() |
bbe8cc6662 | ||
![]() |
98106accb6 | ||
![]() |
af1312bfc3 | ||
![]() |
4c7d7215cd | ||
![]() |
0370d9eb3d | ||
![]() |
1434651d20 | ||
![]() |
2c312ab84a | ||
![]() |
0ee78d62d5 | ||
![]() |
7f3c90ab25 | ||
![]() |
1d3cd29730 | ||
![]() |
4ef1fc9707 | ||
![]() |
f9e6aa1dcf | ||
![]() |
f83db9064b | ||
![]() |
2da9a86399 | ||
![]() |
ecaa535cf4 | ||
![]() |
79dd92b1fe | ||
![]() |
bd3844c9c2 | ||
![]() |
7bf5e3a84a |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.22**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.12.22
|
||||
[debug] youtube-dl version 2021.01.16
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.22**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.22**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.22**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.12.22
|
||||
[debug] youtube-dl version 2021.01.16
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.16. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.22**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.16**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
30
.github/workflows/ci.yml
vendored
30
.github/workflows/ci.yml
vendored
@@ -1,5 +1,5 @@
|
||||
name: CI
|
||||
on: [push]
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
tests:
|
||||
name: Tests
|
||||
@@ -7,31 +7,55 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
os: [ubuntu-18.04]
|
||||
# TODO: python 2.6
|
||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
||||
python-impl: [cpython]
|
||||
ytdl-test-set: [core, download]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
python-impl: cpython
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: bat
|
||||
# jython
|
||||
- os: ubuntu-18.04
|
||||
python-impl: jython
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: sh
|
||||
- os: ubuntu-18.04
|
||||
python-impl: jython
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: sh
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
if: ${{ matrix.python-impl == 'cpython' }}
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Set up Java 8
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
uses: actions/setup-java@v1
|
||||
with:
|
||||
java-version: 8
|
||||
- name: Install Jython
|
||||
if: ${{ matrix.python-impl == 'jython' }}
|
||||
run: |
|
||||
wget http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar -O jython-installer.jar
|
||||
java -jar jython-installer.jar -s -d "$HOME/jython"
|
||||
echo "$HOME/jython/bin" >> $GITHUB_PATH
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Run tests
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' }}
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' || matrix.python-impl == 'jython' }}
|
||||
env:
|
||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
|
50
.travis.yml
50
.travis.yml
@@ -1,50 +0,0 @@
|
||||
language: python
|
||||
python:
|
||||
- "2.6"
|
||||
- "2.7"
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
# - YTDL_TEST_SET=download
|
||||
jobs:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
# - python: 3.7
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- python: 3.8
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
# - python: 3.8
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
# - python: 3.8-dev
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||
- name: flake8
|
||||
python: 3.8
|
||||
dist: xenial
|
||||
install: pip install flake8
|
||||
script: flake8 .
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
# - env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
script: ./devscripts/run_tests.sh
|
163
ChangeLog
163
ChangeLog
@@ -1,3 +1,166 @@
|
||||
version 2021.01.16
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Protect from infinite recursion due to recursively nested
|
||||
playlists (#27833)
|
||||
* [YoutubeDL] Ignore failure to create existing directory (#27811)
|
||||
* [YoutubeDL] Raise syntax error for format selection expressions with multiple
|
||||
+ operators (#27803)
|
||||
|
||||
Extractors
|
||||
+ [animeondemand] Add support for lazy playlist extraction (#27829)
|
||||
* [youporn] Restrict fallback download URL (#27822)
|
||||
* [youporn] Improve height and tbr extraction (#20425, #23659)
|
||||
* [youporn] Fix extraction (#27822)
|
||||
+ [twitter] Add support for unified cards (#27826)
|
||||
+ [twitch] Add Authorization header with OAuth token for GraphQL requests
|
||||
(#27790)
|
||||
* [mixcloud:playlist:base] Extract video id in flat playlist mode (#27787)
|
||||
* [cspan] Improve info extraction (#27791)
|
||||
* [adn] Improve info extraction
|
||||
* [adn] Fix extraction (#26963, #27732)
|
||||
* [youtube:search] Extract from all sections (#27604)
|
||||
* [youtube:search] fix viewcount and try to extract all video sections (#27604)
|
||||
* [twitch] Improve login error extraction
|
||||
* [twitch] Fix authentication (#27743)
|
||||
* [3qsdn] Improve extraction (#21058)
|
||||
* [peertube] Extract formats from streamingPlaylists (#26002, #27586, #27728)
|
||||
* [khanacademy] Fix extraction (#2887, #26803)
|
||||
* [spike] Update Paramount Network feed URL (#27715)
|
||||
|
||||
|
||||
version 2021.01.08
|
||||
|
||||
Core
|
||||
* [downloader/hls] Disable decryption in tests (#27660)
|
||||
+ [utils] Add a function to clean podcast URLs
|
||||
|
||||
Extractors
|
||||
* [rai] Improve subtitles extraction (#27698, #27705)
|
||||
* [canvas] Match only supported VRT NU URLs (#27707)
|
||||
+ [bibeltv] Add support for bibeltv.de (#14361)
|
||||
+ [bfmtv] Add support for bfmtv.com (#16053, #26615)
|
||||
+ [sbs] Add support for ondemand play and news embed URLs (#17650, #27629)
|
||||
* [twitch] Drop legacy kraken API v5 code altogether and refactor
|
||||
* [twitch:vod] Switch to GraphQL for video metadata
|
||||
* [canvas] Fix VRT NU extraction (#26957, #27053)
|
||||
* [twitch] Switch access token to GraphQL and refactor (#27646)
|
||||
+ [rai] Detect ContentItem in iframe (#12652, #27673)
|
||||
* [ketnet] Fix extraction (#27662)
|
||||
+ [dplay] Add suport Discovery+ domains (#27680)
|
||||
* [motherless] Improve extraction (#26495, #27450)
|
||||
* [motherless] Fix recent videos upload date extraction (#27661)
|
||||
* [nrk] Fix extraction for videos without a legalAge rating
|
||||
- [googleplus] Remove extractor (#4955, #7400)
|
||||
+ [applepodcasts] Add support for podcasts.apple.com (#25918)
|
||||
+ [googlepodcasts] Add support for podcasts.google.com
|
||||
+ [iheart] Add support for iheart.com (#27037)
|
||||
* [acast] Clean podcast URLs
|
||||
* [stitcher] Clean podcast URLs
|
||||
+ [xfileshare] Add support for aparat.cam (#27651)
|
||||
+ [twitter] Add support for summary card (#25121)
|
||||
* [twitter] Try to use a Generic fallback for unknown twitter cards (#25982)
|
||||
+ [stitcher] Add support for shows and show metadata extraction (#20510)
|
||||
* [stv] Improve episode id extraction (#23083)
|
||||
|
||||
|
||||
version 2021.01.03
|
||||
|
||||
Extractors
|
||||
* [nrk] Improve series metadata extraction (#27473)
|
||||
+ [nrk] Extract subtitles
|
||||
* [nrk] Fix age limit extraction
|
||||
* [nrk] Improve video id extraction
|
||||
+ [nrk] Add support for podcasts (#27634, #27635)
|
||||
* [nrk] Generalize and delegate all item extractors to nrk
|
||||
+ [nrk] Add support for mp3 formats
|
||||
* [nrktv] Switch to playback endpoint
|
||||
* [vvvvid] Fix season metadata extraction (#18130)
|
||||
* [stitcher] Fix extraction (#20811, #27606)
|
||||
* [acast] Fix extraction (#21444, #27612, #27613)
|
||||
+ [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200)
|
||||
+ [sky] Add support for Sports News articles and Brighcove videos (#13054)
|
||||
+ [vvvvid] Extract akamai formats
|
||||
* [vvvvid] Skip unplayable episodes (#27599)
|
||||
* [yandexvideo] Fix extraction for Python 3.4
|
||||
|
||||
|
||||
version 2020.12.31
|
||||
|
||||
Core
|
||||
* [utils] Accept only supported protocols in url_or_none
|
||||
* [YoutubeDL] Allow format filtering using audio language (#16209)
|
||||
|
||||
Extractors
|
||||
+ [redditr] Extract all thumbnails (#27503)
|
||||
* [vvvvid] Improve info extraction
|
||||
+ [vvvvid] Add support for playlists (#18130, #27574)
|
||||
+ [yandexdisk] Extract info from webpage
|
||||
* [yandexdisk] Fix extraction (#17861, #27131)
|
||||
* [yandexvideo] Use old API call as fallback
|
||||
* [yandexvideo] Fix extraction (#25000)
|
||||
- [nbc] Remove CSNNE extractor
|
||||
* [nbc] Fix NBCSport VPlayer URL extraction (#16640)
|
||||
+ [aenetworks] Add support for biography.com (#3863)
|
||||
* [uktvplay] Match new video URLs (#17909)
|
||||
* [sevenplay] Detect API errors
|
||||
* [tenplay] Fix format extraction (#26653)
|
||||
* [brightcove] Raise error for DRM protected videos (#23467, #27568)
|
||||
|
||||
|
||||
version 2020.12.29
|
||||
|
||||
Extractors
|
||||
* [youtube] Improve yt initial data extraction (#27524)
|
||||
* [youtube:tab] Improve URL matching #27559)
|
||||
* [youtube:tab] Restore retry on browse requests (#27313, #27564)
|
||||
* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904,
|
||||
#25418, #26070, #26350, #26738, #27563)
|
||||
- [brightcove] Remove sonyliv specific code
|
||||
* [piksel] Improve format extraction
|
||||
+ [zype] Add support for uplynk videos
|
||||
+ [toggle] Add support for live.mewatch.sg (#27555)
|
||||
+ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826)
|
||||
* [teachable] Improve embed detection (#26923)
|
||||
* [mitele] Fix free video extraction (#24624, #25827, #26757)
|
||||
* [telecinco] Fix extraction
|
||||
* [youtube] Update invidious.snopyta.org (#22667)
|
||||
* [amcnetworks] Improve auth only video detection (#27548)
|
||||
+ [generic] Add support for VHX Embeds (#27546)
|
||||
|
||||
|
||||
version 2020.12.26
|
||||
|
||||
Extractors
|
||||
* [instagram] Fix comment count extraction
|
||||
+ [instagram] Add support for reel URLs (#26234, #26250)
|
||||
* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821,
|
||||
#27538)
|
||||
* [instagram] Improve thumbnail extraction
|
||||
* [instagram] Fix extraction when authenticated (#22880, #26377, #26981,
|
||||
#27422)
|
||||
* [spankbang:playlist] Fix extraction (#24087)
|
||||
+ [spankbang] Add support for playlist videos
|
||||
* [pornhub] Improve like and dislike count extraction (#27356)
|
||||
* [pornhub] Fix lq formats extraction (#27386, #27393)
|
||||
+ [bongacams] Add support for bongacams.com (#27440)
|
||||
* [youtube:tab] Extend URL regular expression (#27501)
|
||||
* [theweatherchannel] Fix extraction (#25930, #26051)
|
||||
+ [sprout] Add support for Universal Kids (#22518)
|
||||
* [theplatform] Allow passing geo bypass countries from other extractors
|
||||
+ [wistia] Add support for playlists (#27533)
|
||||
+ [ctv] Add support for ctv.ca (#27525)
|
||||
* [9c9media] Improve info extraction
|
||||
* [youtube] Fix automatic captions extraction (#27162, #27388)
|
||||
* [sonyliv] Fix title for movies
|
||||
* [sonyliv] Fix extraction (#25667)
|
||||
* [streetvoice] Fix extraction (#27455, #27492)
|
||||
+ [facebook] Add support for watchparty pages (#27507)
|
||||
* [cbslocal] Fix video extraction
|
||||
+ [brightcove] Add another method to extract policyKey
|
||||
* [mewatch] Relax URL regular expression (#27506)
|
||||
|
||||
|
||||
version 2020.12.22
|
||||
|
||||
Core
|
||||
|
@@ -678,6 +678,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
@@ -880,7 +881,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar
|
||||
java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython"
|
||||
$HOME/jython/bin/jython -m pip install nose
|
@@ -54,9 +54,11 @@
|
||||
- **Aparat**
|
||||
- **AppleConnect**
|
||||
- **AppleDaily**: 臺灣蘋果日報
|
||||
- **ApplePodcasts**
|
||||
- **appletrailers**
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ArcPublishing**
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
@@ -97,6 +99,10 @@
|
||||
- **BellMedia**
|
||||
- **Bet**
|
||||
- **bfi:player**
|
||||
- **bfmtv**
|
||||
- **bfmtv:article**
|
||||
- **bfmtv:live**
|
||||
- **BibelTV**
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili**
|
||||
@@ -104,6 +110,7 @@
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliPlayer**
|
||||
- **BioBioChileTV**
|
||||
- **Biography**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
@@ -112,6 +119,7 @@
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BongaCams**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
@@ -146,6 +154,7 @@
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSLocalArticle**
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
@@ -195,9 +204,9 @@
|
||||
- **CrooksAndLiars**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **cu.ntv.co.jp**: Nippon Television Network
|
||||
- **Culturebox**
|
||||
@@ -314,7 +323,6 @@
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameSpot**
|
||||
@@ -333,6 +341,8 @@
|
||||
- **Go**
|
||||
- **GodTube**
|
||||
- **Golem**
|
||||
- **google:podcasts**
|
||||
- **google:podcasts:feed**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
- **GPUTechConf**
|
||||
@@ -347,6 +357,7 @@
|
||||
- **hgtv.com:show**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
@@ -366,6 +377,8 @@
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **ign.com**
|
||||
- **IHeartRadio**
|
||||
- **iheartradio:podcast**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
@@ -405,7 +418,8 @@
|
||||
- **Katsomo**
|
||||
- **KeezMovies**
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **khanacademy**
|
||||
- **khanacademy:unit**
|
||||
- **KickStarter**
|
||||
- **KinjaEmbed**
|
||||
- **KinoPoisk**
|
||||
@@ -607,6 +621,7 @@
|
||||
- **Npr**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKRadioPodkast**
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
@@ -686,7 +701,6 @@
|
||||
- **Playwire**
|
||||
- **pluralsight**
|
||||
- **pluralsight:course**
|
||||
- **plus.google**: Google Plus
|
||||
- **podomatic**
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
@@ -810,12 +824,13 @@
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **sky.it**
|
||||
- **sky:news**
|
||||
- **sky:sports**
|
||||
- **sky:sports:news**
|
||||
- **skyacademy.it**
|
||||
- **SkylineWebcams**
|
||||
- **SkyNews**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
@@ -856,6 +871,7 @@
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **StitcherShow**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
@@ -1086,6 +1102,7 @@
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VVVVIDShow**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Wakanim**
|
||||
@@ -1108,6 +1125,7 @@
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **Wistia**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **WorldStarHipHop**
|
||||
- **WSJ**: Wall Street Journal
|
||||
@@ -1115,7 +1133,7 @@
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XHamsterUser**
|
||||
|
@@ -464,6 +464,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||
assert_syntax_error('+bestaudio')
|
||||
assert_syntax_error('bestvideo+')
|
||||
assert_syntax_error('/')
|
||||
assert_syntax_error('bestvideo+bestvideo+bestaudio')
|
||||
|
||||
def test_format_filtering(self):
|
||||
formats = [
|
||||
|
@@ -36,7 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
@@ -57,8 +57,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
# def test_youtube_user_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||
|
@@ -258,16 +258,24 @@ class TestNRKSubtitles(BaseTestSubtitles):
|
||||
|
||||
|
||||
class TestRaiPlaySubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
IE = RaiPlayIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
def test_subtitles_key(self):
|
||||
self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||
self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a')
|
||||
|
||||
def test_subtitles_array_key(self):
|
||||
self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html'
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['it']))
|
||||
self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd')
|
||||
|
||||
|
||||
class TestVikiSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
|
||||
|
@@ -21,6 +21,7 @@ from youtube_dl.utils import (
|
||||
encode_base_n,
|
||||
caesar,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
date_from_str,
|
||||
DateRange,
|
||||
detect_exe_version,
|
||||
@@ -554,6 +555,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
self.assertEqual(url_or_none('s3://foo.de'), None)
|
||||
self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
|
||||
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
||||
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
||||
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
@@ -1465,6 +1471,10 @@ Line 1
|
||||
self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
|
||||
self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
|
||||
|
||||
def test_clean_podcast_url(self):
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -338,6 +338,8 @@ class YoutubeDL(object):
|
||||
_pps = []
|
||||
_download_retcode = None
|
||||
_num_downloads = None
|
||||
_playlist_level = 0
|
||||
_playlist_urls = set()
|
||||
_screen_file = None
|
||||
|
||||
def __init__(self, params=None, auto_init=True):
|
||||
@@ -906,115 +908,23 @@ class YoutubeDL(object):
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
elif result_type in ('playlist', 'multi_video'):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
# Protect from infinite recursion due to recursively nested playlists
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||
webpage_url = ie_result['webpage_url']
|
||||
if webpage_url in self._playlist_urls:
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
'[download] Skipping already downloaded playlist: %s'
|
||||
% ie_result.get('title') or ie_result.get('id'))
|
||||
return
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
self._playlist_level += 1
|
||||
self._playlist_urls.add(webpage_url)
|
||||
try:
|
||||
return self.__process_playlist(ie_result, download)
|
||||
finally:
|
||||
self._playlist_level -= 1
|
||||
if not self._playlist_level:
|
||||
self._playlist_urls.clear()
|
||||
elif result_type == 'compat_list':
|
||||
self.report_warning(
|
||||
'Extractor %s returned a compat_list result. '
|
||||
@@ -1039,6 +949,118 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
def __process_playlist(self, ie_result, download):
|
||||
# We process each entry in the playlist
|
||||
playlist = ie_result.get('title') or ie_result.get('id')
|
||||
|
||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||
|
||||
playlist_results = []
|
||||
|
||||
playliststart = self.params.get('playliststart', 1) - 1
|
||||
playlistend = self.params.get('playlistend')
|
||||
# For backwards compatibility, interpret -1 as whole list
|
||||
if playlistend == -1:
|
||||
playlistend = None
|
||||
|
||||
playlistitems_str = self.params.get('playlist_items')
|
||||
playlistitems = None
|
||||
if playlistitems_str is not None:
|
||||
def iter_playlistitems(format):
|
||||
for string_segment in format.split(','):
|
||||
if '-' in string_segment:
|
||||
start, end = string_segment.split('-')
|
||||
for item in range(int(start), int(end) + 1):
|
||||
yield int(item)
|
||||
else:
|
||||
yield int(string_segment)
|
||||
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
|
||||
|
||||
ie_entries = ie_result['entries']
|
||||
|
||||
def make_playlistitems_entries(list_ie_entries):
|
||||
num_entries = len(list_ie_entries)
|
||||
return [
|
||||
list_ie_entries[i - 1] for i in playlistitems
|
||||
if -num_entries <= i - 1 < num_entries]
|
||||
|
||||
def report_download(num_entries):
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Downloading %d videos' %
|
||||
(ie_result['extractor'], playlist, num_entries))
|
||||
|
||||
if isinstance(ie_entries, list):
|
||||
n_all_entries = len(ie_entries)
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(ie_entries)
|
||||
else:
|
||||
entries = ie_entries[playliststart:playlistend]
|
||||
n_entries = len(entries)
|
||||
self.to_screen(
|
||||
'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
|
||||
(ie_result['extractor'], playlist, n_all_entries, n_entries))
|
||||
elif isinstance(ie_entries, PagedList):
|
||||
if playlistitems:
|
||||
entries = []
|
||||
for item in playlistitems:
|
||||
entries.extend(ie_entries.getslice(
|
||||
item - 1, item
|
||||
))
|
||||
else:
|
||||
entries = ie_entries.getslice(
|
||||
playliststart, playlistend)
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
else: # iterable
|
||||
if playlistitems:
|
||||
entries = make_playlistitems_entries(list(itertools.islice(
|
||||
ie_entries, 0, max(playlistitems))))
|
||||
else:
|
||||
entries = list(itertools.islice(
|
||||
ie_entries, playliststart, playlistend))
|
||||
n_entries = len(entries)
|
||||
report_download(n_entries)
|
||||
|
||||
if self.params.get('playlistreverse', False):
|
||||
entries = entries[::-1]
|
||||
|
||||
if self.params.get('playlistrandom', False):
|
||||
random.shuffle(entries)
|
||||
|
||||
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
|
||||
|
||||
for i, entry in enumerate(entries, 1):
|
||||
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
|
||||
# This __x_forwarded_for_ip thing is a bit ugly but requires
|
||||
# minimal changes
|
||||
if x_forwarded_for:
|
||||
entry['__x_forwarded_for_ip'] = x_forwarded_for
|
||||
extra = {
|
||||
'n_entries': n_entries,
|
||||
'playlist': playlist,
|
||||
'playlist_id': ie_result.get('id'),
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
'extractor_key': ie_result['extractor_key'],
|
||||
}
|
||||
|
||||
reason = self._match_entry(entry, incomplete=True)
|
||||
if reason is not None:
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
return ie_result
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __process_iterable_entry(self, entry, download, extra_info):
|
||||
return self.process_ie_result(
|
||||
@@ -1083,7 +1105,7 @@ class YoutubeDL(object):
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
@@ -1226,6 +1248,8 @@ class YoutubeDL(object):
|
||||
group = _parse_format_selection(tokens, inside_group=True)
|
||||
current_selector = FormatSelector(GROUP, group, [])
|
||||
elif string == '+':
|
||||
if inside_merge:
|
||||
raise syntax_error('Unexpected "+"', start)
|
||||
video_selector = current_selector
|
||||
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
||||
if not video_selector or not audio_selector:
|
||||
@@ -1777,6 +1801,8 @@ class YoutubeDL(object):
|
||||
os.makedirs(dn)
|
||||
return True
|
||||
except (OSError, IOError) as err:
|
||||
if isinstance(err, OSError) and err.errno == errno.EEXIST:
|
||||
return True
|
||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||
return False
|
||||
|
||||
|
@@ -172,8 +172,12 @@ class HlsFD(FragmentFD):
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
|
||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||
# not what it decrypts to.
|
||||
if not test:
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
|
@@ -2,21 +2,48 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
class ACastBaseIE(InfoExtractor):
|
||||
def _extract_episode(self, episode, show_info):
|
||||
title = episode['title']
|
||||
info = {
|
||||
'id': episode['id'],
|
||||
'display_id': episode.get('episodeUrl'),
|
||||
'url': clean_podcast_url(episode['url']),
|
||||
'title': title,
|
||||
'description': clean_html(episode.get('description') or episode.get('summary')),
|
||||
'thumbnail': episode.get('image'),
|
||||
'timestamp': parse_iso8601(episode.get('publishDate')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'filesize': int_or_none(episode.get('contentLength')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(episode.get('episode')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'creator': show.get('author'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
return self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
|
||||
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -28,15 +55,15 @@ class ACastIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
||||
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766.602563,
|
||||
'duration': 2766,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
@@ -45,7 +72,7 @@ class ACastIE(InfoExtractor):
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
@@ -54,40 +81,14 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
||||
display_id)
|
||||
media_url = s['url']
|
||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
||||
episode_url = s.get('episodeUrl')
|
||||
if episode_url:
|
||||
display_id = episode_url
|
||||
else:
|
||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e.get('name') or s['title']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
'season_number': int_or_none(e.get('seasonNumber')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
}
|
||||
episode = self._call_api(
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
class ACastChannelIE(ACastBaseIE):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -102,34 +103,24 @@ class ACastChannelIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
|
||||
def _fetch_page(self, channel_slug, page):
|
||||
casts = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_slug = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_slug), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, compat_str(
|
||||
channel_data['id']), channel_data['name'], channel_data.get('description'))
|
||||
show_slug = self._match_id(url)
|
||||
show = self._call_api(show_slug, show_slug)
|
||||
show_info = self._extract_show_info(show)
|
||||
entries = []
|
||||
for episode in (show.get('episodes') or []):
|
||||
entries.append(self._extract_episode(episode, show_info))
|
||||
return self.playlist_result(
|
||||
entries, show.get('id'), show.get('title'), show.get('description'))
|
||||
|
@@ -10,6 +10,7 @@ import random
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
)
|
||||
@@ -18,11 +19,13 @@ from ..utils import (
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,16 +34,27 @@ class ADNIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||
'info_dict': {
|
||||
'id': '7778',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||
'series': 'Blue Exorcist - Kyôto Saga',
|
||||
'duration': 1467,
|
||||
'release_date': '20170106',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'season_number': 2,
|
||||
'episode': 'Début des hostilités',
|
||||
'episode_number': 1,
|
||||
}
|
||||
}
|
||||
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
@@ -54,26 +68,24 @@ class ADNIE(InfoExtractor):
|
||||
def _ass_subtitles_timecode(seconds):
|
||||
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, sub_path),
|
||||
video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
||||
if subtitle_location:
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, subtitle_location),
|
||||
video_id, 'Downloading subtitles data', fatal=False,
|
||||
headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
subtitle_location, video_id, 'Downloading subtitles data',
|
||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -119,59 +131,76 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_config = self._parse_json(self._search_regex(
|
||||
r'playerConfig\s*=\s*({.+});', webpage,
|
||||
'player config', default='{}'), video_id, fatal=False)
|
||||
if not player_config:
|
||||
config_url = urljoin(self._BASE_URL, self._search_regex(
|
||||
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
|
||||
webpage, 'config url'))
|
||||
player_config = self._download_json(
|
||||
config_url, video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
options = player['options']
|
||||
|
||||
video_info = {}
|
||||
video_info_str = self._search_regex(
|
||||
r'videoInfo\s*=\s*({.+});', webpage,
|
||||
'video info', fatal=False)
|
||||
if video_info_str:
|
||||
video_info = self._parse_json(
|
||||
video_info_str, video_id, fatal=False) or {}
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
raise ExtractorError(
|
||||
'This video is only available for paying users', expected=True)
|
||||
# self.raise_login_required() # FIXME: Login is not implemented
|
||||
|
||||
options = player_config.get('options') or {}
|
||||
metas = options.get('metas') or {}
|
||||
links = player_config.get('links') or {}
|
||||
sub_path = player_config.get('subtitles')
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
token = options['token']
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
'e': 60,
|
||||
't': token,
|
||||
}))
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
links_data = self._download_json(
|
||||
urljoin(self._BASE_URL, links_url), video_id,
|
||||
'Downloading links JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + authorization,
|
||||
})
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles') or \
|
||||
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
|
||||
sub_path += '&token=' + token
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web'
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
else:
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
raise ExtractorError('Giving up retrying')
|
||||
|
||||
links = links_data.get('links') or {}
|
||||
metas = links_data.get('metadata') or {}
|
||||
sub_url = (links.get('subtitles') or {}).get('all')
|
||||
video_info = links_data.get('video') or {}
|
||||
title = metas['title']
|
||||
|
||||
formats = []
|
||||
for format_id, qualities in links.items():
|
||||
for format_id, qualities in (links.get('streaming') or {}).items():
|
||||
if not isinstance(qualities, dict):
|
||||
continue
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
@@ -189,19 +218,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
formats.extend(m3u8_formats)
|
||||
if not error:
|
||||
error = options.get('error')
|
||||
if not formats and error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
||||
'thumbnail': video_info.get('image'),
|
||||
'description': strip_or_none(metas.get('summary') or video.get('summary')),
|
||||
'thumbnail': video_info.get('image') or player.get('image'),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
||||
'series': video_info.get('playlistTitle'),
|
||||
'subtitles': self.extract_subtitles(sub_url, video_id),
|
||||
'episode': metas.get('subtitle') or video.get('name'),
|
||||
'episode_number': int_or_none(video.get('shortNumber')),
|
||||
'series': show.get('title'),
|
||||
'season_number': int_or_none(video.get('season')),
|
||||
'duration': int_or_none(video_info.get('duration') or video.get('duration')),
|
||||
'release_date': unified_strdate(video.get('releaseDate')),
|
||||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
@@ -28,6 +29,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
'biography.com': (None, 'biography'),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
@@ -54,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e, GeoRestrictedError):
|
||||
raise
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
@@ -67,6 +71,34 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
@@ -139,32 +171,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
canonical, query={'filter[canonical]': '/' + canonical})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
|
||||
|
||||
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
@@ -249,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
'playlist_mincount': 168,
|
||||
'playlist_mincount': 150,
|
||||
}]
|
||||
_RESOURCE = 'series'
|
||||
_ITEMS_KEY = 'episodes'
|
||||
@@ -294,3 +301,42 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
return self.url_result(
|
||||
'http://www.history.com/videos/' + display_id,
|
||||
AENetworksIE.ie_key())
|
||||
|
||||
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_aetn_info(domain, 'id', video_id, url)
|
||||
|
||||
|
||||
class BiographyIE(AENetworksBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
|
||||
'info_dict': {
|
||||
'id': '30322987',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vincent Van Gogh - Full Episode',
|
||||
'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
|
||||
'timestamp': 1311970571,
|
||||
'upload_date': '20110729',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
@@ -1,13 +1,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class AlJazeeraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
||||
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
|
||||
'info_dict': {
|
||||
'id': '3792260579001',
|
||||
'ext': 'mp4',
|
||||
@@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
'skip': 'Not accessible from Travis CI server',
|
||||
}, {
|
||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
||||
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, program_name)
|
||||
brightcove_id = self._search_regex(
|
||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
||||
post_type, name = re.match(self._VALID_URL, url).groups()
|
||||
post_type = {
|
||||
'features': 'post',
|
||||
'program': 'episode',
|
||||
'videos': 'video',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
'https://www.aljazeera.com/graphql', name, query={
|
||||
'operationName': 'SingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': name,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
'wp-site': 'aje',
|
||||
})['data']['article']['video']
|
||||
video_id = video['id']
|
||||
account_id = video.get('accountId') or '665003303001'
|
||||
player_id = video.get('playerId') or 'BkeSH5BDb'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
|
@@ -80,7 +80,8 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
if properties.get('videoCategory') == 'TVE-Auth':
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
|
@@ -6,8 +6,10 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -22,8 +24,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'timestamp': 1523318400,
|
||||
'upload_date': '20180410',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
@@ -33,6 +35,27 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
|
||||
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
|
||||
'md5': '06451608c57651e985a498e69cec17e5',
|
||||
'info_dict': {
|
||||
'id': '5fbe8c61bda2010001c6763b',
|
||||
'title': 'Simple Chicken Dinner',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1610755200,
|
||||
'upload_date': '20210116',
|
||||
'release_date': '20210116',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 21,
|
||||
'episode': 'Simple Chicken Dinner',
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
@@ -60,7 +83,10 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
'release_date': unified_strdate(video.get('publishDate')),
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'series': try_get(episode, lambda x: x['show']['title']),
|
||||
'episode': episode.get('title'),
|
||||
}
|
||||
|
@@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||
webpage, 'anime description', default=None)
|
||||
|
||||
entries = []
|
||||
|
||||
def extract_info(html, video_id, num=None):
|
||||
title, description = [None] * 2
|
||||
formats = []
|
||||
@@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
self._sort_formats(info['formats'])
|
||||
f = common_info.copy()
|
||||
f.update(info)
|
||||
entries.append(f)
|
||||
yield f
|
||||
|
||||
# Extract teaser/trailer only when full episode is not available
|
||||
if not info['formats']:
|
||||
@@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'title': m.group('title'),
|
||||
'url': urljoin(url, m.group('href')),
|
||||
})
|
||||
entries.append(f)
|
||||
yield f
|
||||
|
||||
def extract_episodes(html):
|
||||
for num, episode_html in enumerate(re.findall(
|
||||
@@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
extract_entries(episode_html, video_id, common_info)
|
||||
for e in extract_entries(episode_html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
def extract_film(html, video_id):
|
||||
common_info = {
|
||||
@@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
'title': anime_title,
|
||||
'description': anime_description,
|
||||
}
|
||||
extract_entries(html, video_id, common_info)
|
||||
for e in extract_entries(html, video_id, common_info):
|
||||
yield e
|
||||
|
||||
extract_episodes(webpage)
|
||||
def entries():
|
||||
has_episodes = False
|
||||
for e in extract_episodes(webpage):
|
||||
has_episodes = True
|
||||
yield e
|
||||
|
||||
if not entries:
|
||||
extract_film(webpage, anime_id)
|
||||
if not has_episodes:
|
||||
for e in extract_film(webpage, anime_id):
|
||||
yield e
|
||||
|
||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
||||
return self.playlist_result(
|
||||
entries(), anime_id, anime_title, anime_description)
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
options = self._parse_json(self._search_regex(
|
||||
r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
|
||||
|
||||
formats = []
|
||||
for sources in player['multiSRC']:
|
||||
for sources in (options.get('multiSRC') or []):
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
info['title'] = get_element_by_id('videoTitle', webpage) or \
|
||||
self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'duration': int_or_none(options.get('duration')),
|
||||
'formats': formats,
|
||||
})
|
||||
|
61
youtube_dl/extractor/applepodcasts.py
Normal file
61
youtube_dl/extractor/applepodcasts.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ApplePodcastsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||
'info_dict': {
|
||||
'id': '1000482637777',
|
||||
'ext': 'mp3',
|
||||
'title': '207 - Whitney Webb Returns',
|
||||
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||
'upload_date': '20200705',
|
||||
'timestamp': 1593921600,
|
||||
'duration': 6425,
|
||||
'series': 'The Tim Dillon Show',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
ember_data = self._parse_json(self._search_regex(
|
||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||
webpage, 'ember data'), episode_id)
|
||||
episode = ember_data['data']['attributes']
|
||||
description = episode.get('description') or {}
|
||||
|
||||
series = None
|
||||
for inc in (ember_data.get('included') or []):
|
||||
if inc.get('type') == 'media/podcast':
|
||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': episode['name'],
|
||||
'url': clean_podcast_url(episode['assetUrl']),
|
||||
'description': description.get('standard') or description.get('short'),
|
||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||
'series': series,
|
||||
}
|
174
youtube_dl/extractor/arcpublishing.py
Normal file
174
youtube_dl/extractor/arcpublishing.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
|
||||
'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.actionnewsjax.com/video/live-stream/
|
||||
'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
|
||||
'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
|
||||
'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
|
||||
'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
|
||||
'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
|
||||
'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
|
||||
'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
|
||||
'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
|
||||
'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
|
||||
'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_POWA_DEFAULTS = [
|
||||
(['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
|
||||
([
|
||||
'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
|
||||
'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
|
||||
'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
|
||||
], 'video-api-cdn.%s.arcpublishing.com/api'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
org, uuid = re.match(self._VALID_URL, url).groups()
|
||||
for orgs, tmpl in self._POWA_DEFAULTS:
|
||||
if org in orgs:
|
||||
base_api_tmpl = tmpl
|
||||
break
|
||||
else:
|
||||
base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
|
||||
if org == 'wapo':
|
||||
org = 'washpost'
|
||||
video = self._download_json(
|
||||
'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
|
||||
uuid, query={'uuid': uuid})[0]
|
||||
title = video['headlines']['basic']
|
||||
is_live = video.get('status') == 'live'
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
stream_type = s.get('stream_type')
|
||||
if stream_type == 'smil':
|
||||
smil_formats = self._extract_smil_formats(
|
||||
s_url, uuid, fatal=False)
|
||||
for f in smil_formats:
|
||||
if f['url'].endswith('/cfx/st'):
|
||||
f['app'] = 'cfx/st'
|
||||
if not f['play_path'].startswith('mp4:'):
|
||||
f['play_path'] = 'mp4:' + f['play_path']
|
||||
if isinstance(f['tbr'], float):
|
||||
f['vbr'] = f['tbr'] * 1000
|
||||
del f['tbr']
|
||||
f['format_id'] = 'rtmp-%d' % f['vbr']
|
||||
formats.extend(smil_formats)
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none':
|
||||
f['preference'] = -40
|
||||
elif f.get('vcodec') == 'none':
|
||||
f['preference'] = -50
|
||||
height = f.get('height')
|
||||
if not height:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
f['vbr'] = int(vbr)
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
'filesize': int_or_none(s.get('filesize')),
|
||||
'url': s_url,
|
||||
'preference': -1,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
|
||||
'description': try_get(video, lambda x: x['subheadlines']['basic']),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video.get('duration'), 100),
|
||||
'timestamp': parse_iso8601(video.get('created_date')),
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
@@ -187,13 +187,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
if doc.tag == 'rss':
|
||||
return GenericIE()._extract_rss(url, video_id, doc)
|
||||
|
||||
title = self._html_search_regex(
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
@@ -249,18 +249,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
||||
# available till 7.01.2022
|
||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||
'info_dict': {
|
||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
||||
'id': '102',
|
||||
'display_id': 'maischberger-die-woche',
|
||||
'id': '100',
|
||||
'ext': 'mp4',
|
||||
'duration': 4435.0,
|
||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
||||
'upload_date': '20180214',
|
||||
'duration': 3687.0,
|
||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||
'upload_date': '20210107',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
@@ -315,17 +315,17 @@ class ARDIE(InfoExtractor):
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': '70153354',
|
||||
'id': '78566716',
|
||||
'title': 'Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||
'timestamp': 1577047500,
|
||||
'upload_date': '20191222',
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
|
||||
'timestamp': 1596658200,
|
||||
'upload_date': '20200805',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
|
@@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
_NETRC_MACHINE = 'bbc'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
_MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
|
||||
_MEDIA_SETS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
'iptv-all',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
_EMP_PLAYLIST_NS,
|
||||
)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
@@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
@@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
def _findall_ns(self, element, xpath):
|
||||
elements = []
|
||||
for ns in self._NAMESPACES:
|
||||
elements.extend(element.findall(xpath % ns))
|
||||
return elements
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
||||
if error is None:
|
||||
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
||||
if error is not None:
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return self._findall_ns(media_selection, './{%s}media')
|
||||
error = media_selection.get('result')
|
||||
if error:
|
||||
raise BBCCoUkIE.MediaSelectionError(error)
|
||||
return media_selection.get('media') or []
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
return media.get('connection') or []
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
@@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, compat_etree_Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
subtitles['en'] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
break
|
||||
return subtitles
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
@@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
last_exception = None
|
||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
||||
for media_set in self._MEDIA_SETS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||
last_exception = e
|
||||
@@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
media_selection = self._download_json(
|
||||
url, programme_id, 'Downloading media selection JSON',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
@@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if kind in ('video', 'audio'):
|
||||
bitrate = int_or_none(media.get('bitrate'))
|
||||
encoding = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
@@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
supplier = connection.get('supplier')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
format_id = supplier or conn_kind or protocol
|
||||
if service:
|
||||
format_id = '%s_%s' % (service, format_id)
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
@@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not service and not supplier and bitrate:
|
||||
if not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
@@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
error = self._search_regex(
|
||||
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||
r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
@@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
||||
# even not geo restricted at all
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
||||
_MEDIA_SETS = [
|
||||
'mobile-tablet-main',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
|
103
youtube_dl/extractor/bfmtv.py
Normal file
103
youtube_dl/extractor/bfmtv.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BFMTVBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
|
||||
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
def _brightcove_url_result(self, video_id, video_block):
|
||||
account_id = video_block.get('accountid') or '876450612001'
|
||||
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||
'BrightcoveNew', video_id)
|
||||
|
||||
|
||||
class BFMTVIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
|
||||
'info_dict': {
|
||||
'id': '6196747868001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
|
||||
'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20201002',
|
||||
'timestamp': 1601629620,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
video_block = extract_attributes(self._search_regex(
|
||||
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||
|
||||
|
||||
class BFMTVLiveIE(BFMTVIE):
|
||||
IE_NAME = 'bfmtv:live'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/en-direct/',
|
||||
'info_dict': {
|
||||
'id': '5615950982001',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader_id': '876450610001',
|
||||
'upload_date': '20171018',
|
||||
'timestamp': 1508329950,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/economie/en-direct/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class BFMTVArticleIE(BFMTVBaseIE):
|
||||
IE_NAME = 'bfmtv:article'
|
||||
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
|
||||
'info_dict': {
|
||||
'id': '202101060198',
|
||||
'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
|
||||
'description': 'md5:947974089c303d3ac6196670ae262843',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
bfmtv_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, bfmtv_id)
|
||||
|
||||
entries = []
|
||||
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||
video_block = extract_attributes(video_block_el)
|
||||
video_id = video_block.get('videoid')
|
||||
if not video_id:
|
||||
continue
|
||||
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
30
youtube_dl/extractor/bibeltv.py
Normal file
30
youtube_dl/extractor/bibeltv.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BibelTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
|
||||
'md5': '252f908192d611de038b8504b08bf97f',
|
||||
'info_dict': {
|
||||
'id': 'ref:329703',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprachkurs in Malaiisch',
|
||||
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
|
||||
'timestamp': 1608316701,
|
||||
'uploader_id': '5840105145001',
|
||||
'upload_date': '20201218',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
crn_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
|
60
youtube_dl/extractor/bongacams.py
Normal file
60
youtube_dl/extractor/bongacams.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
amf = self._download_json(
|
||||
'https://%s/tools/amf.php' % host, channel_id,
|
||||
data=urlencode_postdata((
|
||||
('method', 'getRoomData'),
|
||||
('args[]', channel_id),
|
||||
('args[]', 'false'),
|
||||
)), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
server_url = amf['localData']['videoServerUrl']
|
||||
|
||||
uploader_id = try_get(
|
||||
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
|
||||
uploader = try_get(
|
||||
amf, lambda x: x['performerData']['displayName'], compat_str)
|
||||
like_count = int_or_none(try_get(
|
||||
amf, lambda x: x['performerData']['loversCount']))
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(uploader or uploader_id),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
@@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
num_drm_sources = 0
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
sources = json_data.get('sources') or []
|
||||
for source in sources:
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
if container == 'WVM' or source.get('key_systems'):
|
||||
num_drm_sources += 1
|
||||
continue
|
||||
elif ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if not formats:
|
||||
errors = json_data.get('errors')
|
||||
if errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if sources and num_drm_sources == len(sources):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -600,24 +601,27 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
config, lambda x: x['video_cloud']['policy_key'])
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
webpage = self._download_webpage(
|
||||
base_url + 'index.min.js', video_id)
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
@@ -7,12 +7,12 @@ from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
@@ -37,6 +37,7 @@ class CanvasIE(InfoExtractor):
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8',
|
||||
@@ -47,29 +48,34 @@ class CanvasIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
data = None
|
||||
if site_id != 'vrtvideo':
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
|
||||
# New API endpoint
|
||||
if not data:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({'Content-Type': 'application/json'})
|
||||
token = self._download_json(
|
||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||
'Downloading token', data=b'',
|
||||
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
|
||||
'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
|
||||
data = self._download_json(
|
||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||
video_id, 'Downloading video JSON', fatal=False, query={
|
||||
video_id, 'Downloading video JSON', query={
|
||||
'vrtPlayerToken': token,
|
||||
'client': '%s@PROD' % site_id,
|
||||
}, expected_status=400)
|
||||
message = data.get('message')
|
||||
if message and not data.get('title'):
|
||||
if data.get('code') == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
if not data.get('title'):
|
||||
code = data.get('code')
|
||||
if code == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required()
|
||||
elif code == 'INVALID_LOCATION':
|
||||
self.raise_geo_restricted(countries=['BE'])
|
||||
raise ExtractorError(data.get('message') or code, expected=True)
|
||||
|
||||
title = data['title']
|
||||
description = data.get('description')
|
||||
@@ -205,20 +211,24 @@ class CanvasEenIE(InfoExtractor):
|
||||
|
||||
class VrtNUIE(GigyaBaseIE):
|
||||
IE_DESC = 'VrtNU.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Available via old API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'ext': 'mp4',
|
||||
'title': 'De zwarte weduwe',
|
||||
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
|
||||
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
|
||||
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
|
||||
'duration': 1457.04,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Postbus X',
|
||||
'season': 'Seizoen 1989',
|
||||
'season_number': 1989,
|
||||
'episode': 'De zwarte weduwe',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1595822400,
|
||||
'upload_date': '20200727',
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
@@ -300,69 +310,25 @@ class VrtNUIE(GigyaBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<nui-media[^>]+>)', webpage, 'media element'))
|
||||
video_id = attrs['videoid']
|
||||
publication_id = attrs.get('publicationid')
|
||||
if publication_id:
|
||||
video_id = publication_id + '$' + video_id
|
||||
|
||||
page = (self._parse_json(self._search_regex(
|
||||
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
|
||||
default='{}'), video_id, fatal=False) or {}).get('page') or {}
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
season = self._html_search_regex(
|
||||
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
|
||||
<span>seizoen\ (.+?)</span>\s*
|
||||
</div>''',
|
||||
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
|
||||
webpage, 'season', default=None)
|
||||
|
||||
season_number = int_or_none(season)
|
||||
|
||||
episode_number = int_or_none(self._html_search_regex(
|
||||
r'''(?xms)<div\ class="content__episode">\s*
|
||||
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
|
||||
</div>''',
|
||||
webpage, 'episode_number', default=None))
|
||||
|
||||
release_date = parse_iso8601(self._html_search_regex(
|
||||
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
|
||||
webpage, 'release_date', default=None))
|
||||
|
||||
# If there's a ? or a # in the URL, remove them and everything after
|
||||
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
|
||||
securevideo_url = clean_url + '.mssecurevideo.json'
|
||||
|
||||
try:
|
||||
video = self._download_json(securevideo_url, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
self.raise_login_required()
|
||||
raise
|
||||
|
||||
# We are dealing with a '../<show>.relevant' URL
|
||||
redirect_url = video.get('url')
|
||||
if redirect_url:
|
||||
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
|
||||
|
||||
# There is only one entry, but with an unknown key, so just get
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
'season_number': int_or_none(page.get('episode_season')),
|
||||
})
|
||||
|
@@ -11,7 +11,47 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mcp_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||
|
||||
|
||||
class CBSLocalArticleIE(AnvatoIE):
|
||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -8,11 +8,14 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
@@ -116,8 +119,30 @@ class CSpanIE(InfoExtractor):
|
||||
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||
base_url=url)
|
||||
add_referer(info['formats'])
|
||||
for subtitles in info['subtitles'].values():
|
||||
for subtitle in subtitles:
|
||||
ext = determine_ext(subtitle['url'])
|
||||
if ext == 'php':
|
||||
ext = 'vtt'
|
||||
subtitle['ext'] = ext
|
||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||
return merge_dicts(info, ld_info)
|
||||
title = get_element_by_class('video-page-title', webpage) or \
|
||||
self._og_search_title(webpage)
|
||||
description = get_element_by_attribute('itemprop', 'description', webpage) or \
|
||||
self._html_search_meta(['og:description', 'description'], webpage)
|
||||
return merge_dicts(info, ld_info, {
|
||||
'title': title,
|
||||
'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
|
||||
'description': description,
|
||||
'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
|
||||
'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
|
||||
'duration': int_or_none(self._search_regex(
|
||||
r'jwsetup\.seclength\s*=\s*(\d+);',
|
||||
webpage, 'duration', fatal=False)),
|
||||
'view_count': str_to_int(self._search_regex(
|
||||
r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
|
||||
webpage, 'views', fatal=False)),
|
||||
})
|
||||
|
||||
# Obsolete
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
|
52
youtube_dl/extractor/ctv.py
Normal file
52
youtube_dl/extractor/ctv.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||
'info_dict': {
|
||||
'id': '2102249',
|
||||
'ext': 'flv',
|
||||
'title': 'Wednesday, December 23, 2020',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||
'timestamp': 1608732000,
|
||||
'upload_date': '20201223',
|
||||
'series': 'Your Morning',
|
||||
'season': '2020-2021',
|
||||
'season_number': 5,
|
||||
'episode_number': 88,
|
||||
'tags': ['Your Morning'],
|
||||
'categories': ['Talk Show'],
|
||||
'duration': 7467.126,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||
'query': '''{
|
||||
resolvedPath(path: "/%s") {
|
||||
lastSegment {
|
||||
content {
|
||||
... on AxisContent {
|
||||
axisId
|
||||
videoPlayerDestCode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['resolvedPath']['lastSegment']['content']
|
||||
video_id = content['axisId']
|
||||
return self.url_result(
|
||||
'9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
|
||||
'NineCNineMedia', video_id)
|
@@ -17,7 +17,12 @@ from ..utils import (
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
|
||||
(?:www\.)?(?P<host>d
|
||||
(?:
|
||||
play\.(?P<country>dk|fi|jp|se|no)|
|
||||
iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
|
||||
)
|
||||
)|
|
||||
(?P<subdomain_country>es|it)\.dplay\.com
|
||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||
|
||||
@@ -126,6 +131,24 @@ class DPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.dplay.jp/video/gold-rush/24086',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||
@@ -241,7 +264,7 @@ class DPlayIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain').lstrip('www.')
|
||||
country = mobj.group('country') or mobj.group('subdomain_country')
|
||||
host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
|
||||
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
|
||||
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
@@ -33,6 +33,8 @@ from .aenetworks import (
|
||||
AENetworksCollectionIE,
|
||||
AENetworksShowIE,
|
||||
HistoryTopicIE,
|
||||
HistoryPlayerIE,
|
||||
BiographyIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
@@ -53,7 +55,9 @@ from .appletrailers import (
|
||||
AppleTrailersIE,
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .applepodcasts import ApplePodcastsIE
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
@@ -97,6 +101,12 @@ from .bellmedia import BellMediaIE
|
||||
from .beatport import BeatportIE
|
||||
from .bet import BetIE
|
||||
from .bfi import BFIPlayerIE
|
||||
from .bfmtv import (
|
||||
BFMTVIE,
|
||||
BFMTVLiveIE,
|
||||
BFMTVArticleIE,
|
||||
)
|
||||
from .bibeltv import BibelTVIE
|
||||
from .bigflix import BigflixIE
|
||||
from .bild import BildIE
|
||||
from .bilibili import (
|
||||
@@ -119,6 +129,7 @@ from .bleacherreport import (
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .bpb import BpbIE
|
||||
@@ -163,7 +174,10 @@ from .cbc import (
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
@@ -241,6 +255,7 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
@@ -394,7 +409,6 @@ from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamespot import GameSpotIE
|
||||
@@ -415,7 +429,10 @@ from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
from .googleplus import GooglePlusIE
|
||||
from .googlepodcasts import (
|
||||
GooglePodcastsIE,
|
||||
GooglePodcastsFeedIE,
|
||||
)
|
||||
from .googlesearch import GoogleSearchIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
@@ -456,6 +473,10 @@ from .ign import (
|
||||
OneUPIE,
|
||||
PCMagIE,
|
||||
)
|
||||
from .iheart import (
|
||||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
ImdbListIE
|
||||
@@ -505,7 +526,10 @@ from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
)
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
@@ -627,6 +651,11 @@ from .microsoftvirtualacademy import (
|
||||
MicrosoftVirtualAcademyIE,
|
||||
MicrosoftVirtualAcademyCourseIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsIE,
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
)
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
@@ -686,7 +715,6 @@ from .nba import (
|
||||
NBAChannelIE,
|
||||
)
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
@@ -784,6 +812,7 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKRadioPodkastIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
@@ -1047,6 +1076,7 @@ from .skynewsarabia import (
|
||||
from .sky import (
|
||||
SkyNewsIE,
|
||||
SkySportsIE,
|
||||
SkySportsNewsIE,
|
||||
)
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
@@ -1084,10 +1114,17 @@ from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
StitcherShowIE,
|
||||
)
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .spotify import (
|
||||
SpotifyIE,
|
||||
SpotifyShowIE,
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
@@ -1420,7 +1457,10 @@ from .vshare import VShareIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
VVVVIDShowIE,
|
||||
)
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
@@ -1451,7 +1491,10 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wistia import WistiaIE
|
||||
from .wistia import (
|
||||
WistiaIE,
|
||||
WistiaPlaylistIE,
|
||||
)
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -8,6 +9,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/
|
||||
groups/[^/]+/permalink/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
@@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||
'info_dict': {
|
||||
'id': '211641140192478',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
'graphURI': '/api/graphql/'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||
@@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
graphql_data = self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
|
||||
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
|
||||
for require in (graphql_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
entries = []
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
@@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
post_data = {
|
||||
'doc_id': 3731964053542869,
|
||||
'variables': json.dumps({
|
||||
'livingRoomID': video_id,
|
||||
}),
|
||||
}
|
||||
|
||||
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||
if prefetched_data:
|
||||
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||
if lsd:
|
||||
post_data[lsd['name']] = lsd['value']
|
||||
|
||||
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||
for define in (relay_data.get('define') or []):
|
||||
if define[0] == 'RelayAPIConfigDefaults':
|
||||
self._api_config = define[2]
|
||||
|
||||
living_room = self._download_json(
|
||||
urljoin(url, self._api_config['graphURI']), video_id,
|
||||
data=urlencode_postdata(post_data))['data']['living_room']
|
||||
|
||||
entries = []
|
||||
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||
v_id = video.get('id')
|
||||
if not v_id:
|
||||
continue
|
||||
v_id = compat_str(v_id)
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||
self.ie_key(), v_id, video.get('name')))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
# Video info not in first request, do a secondary request using
|
||||
# tahoe player specific URL
|
||||
tahoe_data = self._download_webpage(
|
||||
|
@@ -1,77 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/1032565827847',
|
||||
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
|
||||
'info_dict': {
|
||||
'id': 'dRzwHC_MMqIv',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20170825',
|
||||
'timestamp': 1503686274,
|
||||
'episode_number': 0,
|
||||
'season_number': 2,
|
||||
'series': 'Better Things',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'series': video_data.get('data-show-title'),
|
||||
'episode_number': int_or_none(video_data.get('data-episode')),
|
||||
'season_number': int_or_none(video_data.get('data-season')),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@@ -67,7 +67,10 @@ from .tube8 import Tube8IE
|
||||
from .mofosex import MofosexEmbedIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .youporn import YouPornIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
@@ -124,6 +127,7 @@ from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2024,22 +2028,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
|
||||
'info_dict': {
|
||||
'id': 'uPDB5I9wfp8',
|
||||
'ext': 'webm',
|
||||
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
|
||||
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
|
||||
'upload_date': '20160219',
|
||||
'uploader': 'Pocoyo - Português (BR)',
|
||||
'uploader_id': 'PocoyoBrazil',
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
|
||||
'info_dict': {
|
||||
@@ -2209,7 +2197,32 @@ class GenericIE(InfoExtractor):
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
{
|
||||
# VHX Embed
|
||||
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
|
||||
'info_dict': {
|
||||
'id': '858208',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled',
|
||||
'uploader_id': 'user80538407',
|
||||
'uploader': 'OTT Videos',
|
||||
},
|
||||
},
|
||||
{
|
||||
# ArcPublishing PoWa video player
|
||||
'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
|
||||
'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
|
||||
'info_dict': {
|
||||
'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Senate candidates wave to voters on Anchorage streets',
|
||||
'description': 'md5:91f51a6511f090617353dc720318b20e',
|
||||
'timestamp': 1604378735,
|
||||
'upload_date': '20201103',
|
||||
'duration': 1581,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2576,6 +2589,10 @@ class GenericIE(InfoExtractor):
|
||||
if tp_urls:
|
||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||
|
||||
arc_urls = ArcPublishingIE._extract_urls(webpage)
|
||||
if arc_urls:
|
||||
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2587,6 +2604,10 @@ class GenericIE(InfoExtractor):
|
||||
if vimeo_urls:
|
||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||
|
||||
vhx_url = VHXEmbedIE._extract_url(webpage)
|
||||
if vhx_url:
|
||||
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
|
@@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'resource_id': 'Disney',
|
||||
}
|
||||
},
|
||||
'fxnow.fxnetworks': {
|
||||
'brand': '025',
|
||||
'requestor_id': 'dtci',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?P<sub_domain>%s)\.)?go|
|
||||
(?P<sub_domain_2>abc|freeform|disneynow)
|
||||
(?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
@@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||
'info_dict': {
|
||||
'id': 'VDKA12782841',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
|
@@ -1,73 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import codecs
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class GooglePlusIE(InfoExtractor):
|
||||
IE_DESC = 'Google Plus'
|
||||
_VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
||||
IE_NAME = 'plus.google'
|
||||
_TEST = {
|
||||
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
||||
'info_dict': {
|
||||
'id': 'ZButuJc6CtH',
|
||||
'ext': 'flv',
|
||||
'title': '嘆きの天使 降臨',
|
||||
'upload_date': '20120613',
|
||||
'uploader': '井上ヨシマサ',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Step 1, Retrieve post webpage to extract further information
|
||||
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
||||
|
||||
title = self._og_search_description(webpage).splitlines()[0]
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
||||
webpage, 'upload date', fatal=False, flags=re.VERBOSE))
|
||||
uploader = self._html_search_regex(
|
||||
r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
|
||||
|
||||
# Step 2, Simulate clicking the image box to launch video
|
||||
DOMAIN = 'https://plus.google.com/'
|
||||
video_page = self._search_regex(
|
||||
r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
||||
webpage, 'video page URL')
|
||||
if not video_page.startswith(DOMAIN):
|
||||
video_page = DOMAIN + video_page
|
||||
|
||||
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
||||
|
||||
def unicode_escape(s):
|
||||
decoder = codecs.getdecoder('unicode_escape')
|
||||
return re.sub(
|
||||
r'\\u[0-9a-fA-F]{4,}',
|
||||
lambda m: decoder(m.group(0))[0],
|
||||
s)
|
||||
|
||||
# Extract video links all sizes
|
||||
formats = [{
|
||||
'url': unicode_escape(video_url),
|
||||
'ext': 'flv',
|
||||
'width': int(width),
|
||||
'height': int(height),
|
||||
} for width, height, video_url in re.findall(
|
||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
88
youtube_dl/extractor/googlepodcasts.py
Normal file
88
youtube_dl/extractor/googlepodcasts.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class GooglePodcastsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
|
||||
|
||||
def _batch_execute(self, func_id, video_id, params):
|
||||
return json.loads(self._download_json(
|
||||
'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
|
||||
video_id, data=urlencode_postdata({
|
||||
'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
|
||||
}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
return {
|
||||
'id': episode[4][3],
|
||||
'title': episode[8],
|
||||
'url': clean_podcast_url(episode[13]),
|
||||
'thumbnail': episode[2],
|
||||
'description': episode[9],
|
||||
'creator': try_get(episode, lambda x: x[14]),
|
||||
'timestamp': int_or_none(episode[11]),
|
||||
'duration': int_or_none(episode[12]),
|
||||
'series': episode[1],
|
||||
}
|
||||
|
||||
|
||||
class GooglePodcastsIE(GooglePodcastsBaseIE):
|
||||
IE_NAME = 'google:podcasts'
|
||||
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
|
||||
'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
|
||||
'info_dict': {
|
||||
'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
|
||||
'ext': 'mp3',
|
||||
'title': 'WWDTM New Year 2021',
|
||||
'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
|
||||
'upload_date': '20210102',
|
||||
'timestamp': 1609606800,
|
||||
'duration': 2901,
|
||||
'series': "Wait Wait... Don't Tell Me!",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
|
||||
episode = self._batch_execute(
|
||||
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
|
||||
return self._extract_episode(episode)
|
||||
|
||||
|
||||
class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
|
||||
IE_NAME = 'google:podcasts:feed'
|
||||
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
|
||||
'info_dict': {
|
||||
'title': "Wait Wait... Don't Tell Me!",
|
||||
'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
b64_feed_url = self._match_id(url)
|
||||
data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
|
||||
|
||||
entries = []
|
||||
for episode in (try_get(data, lambda x: x[1][0]) or []):
|
||||
entries.append(self._extract_episode(episode))
|
||||
|
||||
feed = try_get(data, lambda x: x[3]) or []
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=try_get(feed, lambda x: x[0]),
|
||||
playlist_description=try_get(feed, lambda x: x[2]))
|
97
youtube_dl/extractor/iheart.py
Normal file
97
youtube_dl/extractor/iheart.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class IHeartRadioBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
'https://api.iheart.com/api/v3/podcast/' + path,
|
||||
video_id, fatal=fatal, query=query)
|
||||
|
||||
def _extract_episode(self, episode):
|
||||
return {
|
||||
'thumbnail': episode.get('imageUrl'),
|
||||
'description': clean_html(episode.get('description')),
|
||||
'timestamp': int_or_none(episode.get('startDate'), 1000),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
}
|
||||
|
||||
|
||||
class IHeartRadioIE(IHeartRadioBaseIE):
|
||||
IENAME = 'iheartradio'
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
|
||||
'md5': 'c8609c92c8688dcb69d8541042b8abca',
|
||||
'info_dict': {
|
||||
'id': '70346499',
|
||||
'ext': 'mp3',
|
||||
'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
|
||||
'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
|
||||
'timestamp': 1597741200,
|
||||
'upload_date': '20200818',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api(
|
||||
'episodes/' + episode_id, episode_id)['episode']
|
||||
info = self._extract_episode(episode)
|
||||
info.update({
|
||||
'id': episode_id,
|
||||
'title': episode['title'],
|
||||
'url': clean_podcast_url(episode['mediaUrl']),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class IHeartRadioPodcastIE(IHeartRadioBaseIE):
|
||||
IE_NAME = 'iheartradio:podcast'
|
||||
_VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
|
||||
'info_dict': {
|
||||
'id': '30717896',
|
||||
'title': 'It Could Happen Here',
|
||||
'description': 'md5:5842117412a967eb0b01f8088eb663e2',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
podcast_id = self._match_id(url)
|
||||
path = 'podcasts/' + podcast_id
|
||||
episodes = self._call_api(
|
||||
path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
|
||||
|
||||
entries = []
|
||||
for episode in episodes:
|
||||
episode_id = str_or_none(episode.get('id'))
|
||||
if not episode_id:
|
||||
continue
|
||||
info = self._extract_episode(episode)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'id': episode_id,
|
||||
'title': episode.get('title'),
|
||||
'url': 'iheartradio:' + episode_id,
|
||||
'ie_key': IHeartRadioIE.ie_key(),
|
||||
})
|
||||
entries.append(info)
|
||||
|
||||
podcast = self._call_api(path, podcast_id, False) or {}
|
||||
|
||||
return self.playlist_result(
|
||||
entries, podcast_id, podcast.get('title'), podcast.get('description'))
|
@@ -22,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
(media, video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
width) = [None] * 12
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -137,59 +140,77 @@ class InstagramIE(InfoExtractor):
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if additional_data:
|
||||
media = try_get(
|
||||
additional_data, lambda x: x['graphql']['shortcode_media'],
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
||||
def get_count(key, kind):
|
||||
return int_or_none(try_get(
|
||||
def get_count(keys, kind):
|
||||
if not isinstance(keys, (list, tuple)):
|
||||
keys = [keys]
|
||||
for key in keys:
|
||||
count = int_or_none(try_get(
|
||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count('to_comment', 'comment')
|
||||
if count is not None:
|
||||
return count
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count(
|
||||
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
@@ -2,92 +2,71 @@ from __future__ import unicode_literals
|
||||
|
||||
from .canvas import CanvasIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class KetnetIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
||||
'md5': '6bdeb65998930251bbd1c510750edba9',
|
||||
'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
|
||||
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
||||
'info_dict': {
|
||||
'id': 'zomerse-filmpjes',
|
||||
'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
|
||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
||||
'title': 'Nachtwacht - Reeks 3: Aflevering 1',
|
||||
'description': 'De Nachtwacht krijgt te maken met een parasiet',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
# mzid in playerConfig instead of sources
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
'duration': 1468.02,
|
||||
'timestamp': 1609225200,
|
||||
'upload_date': '20201229',
|
||||
'series': 'Nachtwacht',
|
||||
'season': 'Reeks 3',
|
||||
'episode': 'De Greystook',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# mzsource, geo restricted to Belgium
|
||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
|
||||
'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video = self._download_json(
|
||||
'https://senior-bff.ketnet.be/graphql', display_id, query={
|
||||
'query': '''{
|
||||
video(id: "content/ketnet/nl/%s.model.json") {
|
||||
description
|
||||
episodeNr
|
||||
imageUrl
|
||||
mediaReference
|
||||
programTitle
|
||||
publicationDate
|
||||
seasonTitle
|
||||
subtitleVideodetail
|
||||
titleVideodetail
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['video']
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
|
||||
'player config'),
|
||||
video_id)
|
||||
|
||||
mzid = config.get('mzid')
|
||||
if mzid:
|
||||
return self.url_result(
|
||||
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
|
||||
CanvasIE.ie_key(), video_id=mzid)
|
||||
|
||||
title = config['title']
|
||||
|
||||
formats = []
|
||||
for source_key in ('', 'mz'):
|
||||
source = config.get('%ssource' % source_key)
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
for format_id, format_url in source.items():
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif format_id == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
mz_id = compat_urllib_parse_unquote(video['mediaReference'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': config.get('description'),
|
||||
'thumbnail': config.get('image'),
|
||||
'series': config.get('program'),
|
||||
'episode': config.get('episode'),
|
||||
'formats': formats,
|
||||
'_type': 'url_transparent',
|
||||
'id': mz_id,
|
||||
'title': video['titleVideodetail'],
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
|
||||
'thumbnail': video.get('imageUrl'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('publicationDate')),
|
||||
'series': video.get('programTitle'),
|
||||
'season': video.get('seasonTitle'),
|
||||
'episode': video.get('subtitleVideodetail'),
|
||||
'episode_number': int_or_none(video.get('episodeNr')),
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
}
|
||||
|
@@ -1,82 +1,107 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class KhanAcademyIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
||||
IE_NAME = 'KhanAcademy'
|
||||
class KhanAcademyBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.khanacademy.org/video/one-time-pad',
|
||||
'md5': '7b391cce85e758fb94f763ddc1bbb979',
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video['youtubeId'],
|
||||
'id': video.get('slug'),
|
||||
'title': video.get('title'),
|
||||
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'description': video.get('description'),
|
||||
'ie_key': 'Youtube',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
component_props = self._parse_json(self._download_json(
|
||||
'https://www.khanacademy.org/api/internal/graphql',
|
||||
display_id, query={
|
||||
'hash': 1604303425,
|
||||
'variables': json.dumps({
|
||||
'path': display_id,
|
||||
'queryParams': '',
|
||||
}),
|
||||
})['data']['contentJson'], display_id)['componentProps']
|
||||
return self._parse_component_props(component_props)
|
||||
|
||||
|
||||
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy'
|
||||
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||
'info_dict': {
|
||||
'id': 'one-time-pad',
|
||||
'ext': 'webm',
|
||||
'id': 'FlIG3TvQCBQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'The one-time pad',
|
||||
'description': 'The perfect cipher',
|
||||
'duration': 176,
|
||||
'uploader': 'Brit Cruise',
|
||||
'uploader_id': 'khanacademy',
|
||||
'upload_date': '20120411',
|
||||
'timestamp': 1334170113,
|
||||
'license': 'cc-by-nc-sa',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
||||
}
|
||||
|
||||
def _parse_component_props(self, component_props):
|
||||
video = component_props['tutorialPageData']['contentModel']
|
||||
info = self._parse_video(video)
|
||||
author_names = video.get('authorNames')
|
||||
info.update({
|
||||
'uploader': ', '.join(author_names) if author_names else None,
|
||||
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||
'license': video.get('kaUserLicense'),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||
IE_NAME = 'khanacademy:unit'
|
||||
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||
'info_dict': {
|
||||
'id': 'cryptography',
|
||||
'title': 'Journey into cryptography',
|
||||
'title': 'Cryptography',
|
||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
'playlist_mincount': 31,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
m = re.match(self._VALID_URL, url)
|
||||
video_id = m.group('id')
|
||||
def _parse_component_props(self, component_props):
|
||||
curation = component_props['curation']
|
||||
|
||||
if m.group('key') == 'video':
|
||||
data = self._download_json(
|
||||
'http://api.khanacademy.org/api/v1/videos/' + video_id,
|
||||
video_id, 'Downloading video info')
|
||||
|
||||
upload_date = unified_strdate(data['date_added'])
|
||||
uploader = ', '.join(data['author_names'])
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['url'],
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': data['image_url'],
|
||||
'duration': data['duration'],
|
||||
'description': data['description'],
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
entries = []
|
||||
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||
chapter_info = {
|
||||
'chapter': tutorial.get('title'),
|
||||
'chapter_number': tutorial_number,
|
||||
'chapter_id': tutorial.get('id'),
|
||||
}
|
||||
else:
|
||||
# topic
|
||||
data = self._download_json(
|
||||
'http://api.khanacademy.org/api/v1/topic/' + video_id,
|
||||
video_id, 'Downloading topic info')
|
||||
for content_item in (tutorial.get('contentItems') or []):
|
||||
if content_item.get('kind') == 'Video':
|
||||
info = self._parse_video(content_item)
|
||||
info.update(chapter_info)
|
||||
entries.append(info)
|
||||
|
||||
entries = [
|
||||
{
|
||||
'_type': 'url',
|
||||
'url': c['url'],
|
||||
'id': c['id'],
|
||||
'title': c['title'],
|
||||
}
|
||||
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'entries': entries,
|
||||
}
|
||||
return self.playlist_result(
|
||||
entries, curation.get('unit'), curation.get('title'),
|
||||
curation.get('description'))
|
||||
|
196
youtube_dl/extractor/minds.py
Normal file
196
youtube_dl/extractor/minds.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MindsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
api_url = 'https://www.minds.com/api/' + path
|
||||
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||
return self._download_json(
|
||||
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||
'Referer': 'https://www.minds.com/',
|
||||
'X-XSRF-TOKEN': token.value if token else '',
|
||||
}, query=query)
|
||||
|
||||
|
||||
class MindsIE(MindsBaseIE):
|
||||
IE_NAME = 'minds'
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.minds.com/media/100000000000086822',
|
||||
'md5': '215a658184a419764852239d4970b045',
|
||||
'info_dict': {
|
||||
'id': '100000000000086822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minds intro sequence',
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'uploader_id': 'ottman',
|
||||
'upload_date': '20130524',
|
||||
'timestamp': 1369404826,
|
||||
'uploader': 'Bill Ottman',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'tags': ['animation'],
|
||||
'comment_count': int,
|
||||
'license': 'attribution-cc',
|
||||
},
|
||||
}, {
|
||||
# entity.type == 'activity' and empty title
|
||||
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||
'info_dict': {
|
||||
'id': '798022190320226304',
|
||||
'ext': 'mp4',
|
||||
'title': '798022190320226304',
|
||||
'uploader': 'ColinFlaherty',
|
||||
'upload_date': '20180111',
|
||||
'timestamp': 1515639316,
|
||||
'uploader_id': 'ColinFlaherty',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# youtube perma_url
|
||||
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._call_api(
|
||||
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||
if entity.get('type') == 'activity':
|
||||
if entity.get('custom_type') == 'video':
|
||||
video_id = entity['entity_guid']
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
'v2/media/video/' + video_id, video_id, 'video')
|
||||
|
||||
formats = []
|
||||
for source in (video.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'height': int_or_none(source.get('size')),
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entity = video.get('entity') or entity
|
||||
owner = entity.get('ownerObj') or {}
|
||||
uploader_id = owner.get('username')
|
||||
|
||||
tags = entity.get('tags')
|
||||
if tags and isinstance(tags, compat_str):
|
||||
tags = [tags]
|
||||
|
||||
thumbnail = None
|
||||
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||
if poster:
|
||||
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||
if urlh:
|
||||
thumbnail = urlh.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': entity.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'description': clean_html(entity.get('description')) or None,
|
||||
'license': str_or_none(entity.get('license')),
|
||||
'timestamp': int_or_none(entity.get('time_created')),
|
||||
'uploader': strip_or_none(owner.get('name')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||
'view_count': int_or_none(entity.get('play:count')),
|
||||
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||
'tags': tags,
|
||||
'comment_count': int_or_none(entity.get('comments:count')),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class MindsFeedBaseIE(MindsBaseIE):
|
||||
_PAGE_SIZE = 150
|
||||
|
||||
def _entries(self, feed_id):
|
||||
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||
i = 1
|
||||
while True:
|
||||
data = self._call_api(
|
||||
'v2/feeds/container/%s/videos' % feed_id,
|
||||
feed_id, 'page %s' % i, query)
|
||||
entities = data.get('entities') or []
|
||||
for entity in entities:
|
||||
guid = entity.get('guid')
|
||||
if not guid:
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://www.minds.com/newsfeed/' + guid,
|
||||
MindsIE.ie_key(), guid)
|
||||
query['from_timestamp'] = data['load-next']
|
||||
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||
break
|
||||
i += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_id = self._match_id(url)
|
||||
feed = self._call_api(
|
||||
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(feed['guid']), feed_id,
|
||||
strip_or_none(feed.get('name')),
|
||||
feed.get('briefdescription'))
|
||||
|
||||
|
||||
class MindsChannelIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'channel'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||
_FEED_PATH = 'channel'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/ottman',
|
||||
'info_dict': {
|
||||
'id': 'ottman',
|
||||
'title': 'Bill Ottman',
|
||||
'description': 'Co-creator & CEO @minds',
|
||||
},
|
||||
'playlist_mincount': 54,
|
||||
}
|
||||
|
||||
|
||||
class MindsGroupIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'group'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||
_FEED_PATH = 'groups/group'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||
'info_dict': {
|
||||
'id': '785582576369672204',
|
||||
'title': 'Cooking Videos',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}
|
@@ -1,15 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .telecinco import TelecincoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
class MiTeleIE(TelecincoIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
@@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
@@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor):
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video = pre_player['video']
|
||||
video_id = video['dataMediaId']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
info = content.get('info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
# for some reason only HLS is supported
|
||||
'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
|
||||
'id': video_id,
|
||||
video_info.update({
|
||||
'title': title,
|
||||
'description': info.get('synopsis'),
|
||||
'series': content.get('title'),
|
||||
@@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'episode': content.get('subtitle'),
|
||||
'episode_number': int_or_none(info.get('episode_number')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': video.get('dataPoster'),
|
||||
'age_limit': int_or_none(info.get('rating')),
|
||||
'timestamp': parse_iso8601(pre_player.get('publishedTime')),
|
||||
}
|
||||
})
|
||||
return video_info
|
||||
|
@@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
|
||||
cloudcast_url = cloudcast.get('url')
|
||||
if not cloudcast_url:
|
||||
continue
|
||||
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
||||
entries.append(self.url_result(
|
||||
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
|
||||
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||
|
||||
page_info = items['pageInfo']
|
||||
has_next_page = page_info['hasNextPage']
|
||||
@@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||
_DESCRIPTION_KEY = 'biog'
|
||||
_ROOT_TYPE = 'user'
|
||||
_NODE_TEMPLATE = '''slug
|
||||
url'''
|
||||
url
|
||||
owner { username }'''
|
||||
|
||||
def _get_playlist_title(self, title, slug):
|
||||
return '%s (%s)' % (title, slug)
|
||||
@@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||
_NODE_TEMPLATE = '''cloudcast {
|
||||
slug
|
||||
url
|
||||
owner { username }
|
||||
}'''
|
||||
|
||||
def _get_cloudcast(self, node):
|
||||
|
@@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):
|
||||
# no keywords
|
||||
'url': 'http://motherless.com/8B4BBC1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# see https://motherless.com/videos/recent for recent videos with
|
||||
# uploaded date in "ago" format
|
||||
'url': 'https://motherless.com/3C3E2CF',
|
||||
'info_dict': {
|
||||
'id': '3C3E2CF',
|
||||
'ext': 'mp4',
|
||||
'title': 'a/ Hot Teens',
|
||||
'categories': list,
|
||||
'upload_date': '20210104',
|
||||
'uploader_id': 'yonbiw',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -85,20 +102,28 @@ class MotherlessIE(InfoExtractor):
|
||||
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
(r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
(r'>([\d,.]+)\s+Favorites<',
|
||||
r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
||||
if 'Ago' in upload_date:
|
||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
||||
else:
|
||||
upload_date = unified_strdate(upload_date)
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
|
||||
'upload date', default=None))
|
||||
if not upload_date:
|
||||
uploaded_ago = self._search_regex(
|
||||
r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
|
||||
default=None)
|
||||
if uploaded_ago:
|
||||
delta = int(uploaded_ago[:-1])
|
||||
_AGO_UNITS = {
|
||||
'h': 'hours',
|
||||
'd': 'days',
|
||||
}
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = webpage.count('class="media-comment-contents"')
|
||||
uploader_id = self._html_search_regex(
|
||||
|
@@ -158,7 +158,8 @@ class NBCIE(AdobePassIE):
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||
@@ -174,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
iframe_m = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||
r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
|
||||
if iframe_m:
|
||||
return iframe_m.group('url')
|
||||
|
||||
@@ -192,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
# Does not include https because its certificate is invalid
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# iframe src
|
||||
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||
'info_dict': {
|
||||
'id': 'PHJSaFWbrTY9',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'upload_date': '20150330',
|
||||
'timestamp': 1427726529,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# data-mpx-src
|
||||
'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data-src
|
||||
'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -274,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE):
|
||||
}
|
||||
|
||||
|
||||
class CSNNEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
|
||||
'info_dict': {
|
||||
'id': 'yvBLLUgQ8WU0',
|
||||
'ext': 'mp4',
|
||||
'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
|
||||
'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
|
||||
'timestamp': 1459369979,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': self._html_search_meta('twitter:player:stream', webpage),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||
|
||||
|
@@ -90,7 +90,7 @@ class NhkVodIE(NhkBaseIE):
|
||||
_TESTS = [{
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
||||
'info_dict': {
|
||||
'id': 'a95j5iza',
|
||||
'ext': 'mp4',
|
||||
|
@@ -5,10 +5,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'$include': '[HasClosedCaptions]',
|
||||
})
|
||||
|
||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
|
||||
if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
manifest_base_url = content_package_url + 'manifest.'
|
||||
@@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for image in content.get('Images', []):
|
||||
for image in (content.get('Images') or []):
|
||||
image_url = image.get('Url')
|
||||
if not image_url:
|
||||
continue
|
||||
@@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
continue
|
||||
container.append(e_name)
|
||||
|
||||
season = content.get('Season', {})
|
||||
season = content.get('Season') or {}
|
||||
|
||||
info = {
|
||||
'id': content_id,
|
||||
@@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'episode_number': int_or_none(content.get('Episode')),
|
||||
'season': season.get('Name'),
|
||||
'season_number': season.get('Number'),
|
||||
'season_number': int_or_none(season.get('Number')),
|
||||
'season_id': season.get('Id'),
|
||||
'series': content.get('Media', {}).get('Name'),
|
||||
'series': try_get(content, lambda x: x['Media']['Name']),
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'duration': float_or_none(content_package.get('Duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
if content_package.get('HasClosedCaptions'):
|
||||
|
@@ -6,16 +6,13 @@ import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
url_or_none,
|
||||
@@ -63,7 +60,8 @@ class NRKBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
urljoin('http://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query)
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
@@ -116,9 +114,39 @@ class NRKIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# podcast
|
||||
'url': 'nrk:l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:podcast/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# clip
|
||||
'url': 'nrk:150533',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:clip/150533',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# program
|
||||
'url': 'nrk:MDDP12000117',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:program/ENRK10100318',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# direkte
|
||||
'url': 'nrk:nrk1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:channel/nrk1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_playback(self, video_id):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).split('/')[-1]
|
||||
|
||||
path_templ = 'playback/%s/' + video_id
|
||||
|
||||
def call_playback_api(item, query=None):
|
||||
@@ -126,6 +154,8 @@ class NRKIE(NRKBaseIE):
|
||||
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
||||
|
||||
video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id
|
||||
|
||||
if manifest.get('playability') == 'nonPlayable':
|
||||
self._raise_error(manifest['nonPlayable'])
|
||||
|
||||
@@ -140,8 +170,15 @@ class NRKIE(NRKBaseIE):
|
||||
format_url = url_or_none(asset.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||
asset_format = (asset.get('format') or '').lower()
|
||||
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
||||
elif asset_format == 'mp3':
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': asset_format,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = call_playback_api('metadata')
|
||||
@@ -168,59 +205,133 @@ class NRKIE(NRKBaseIE):
|
||||
'height': int_or_none(image.get('pixelHeight')),
|
||||
})
|
||||
|
||||
return {
|
||||
subtitles = {}
|
||||
for sub in try_get(playable, lambda x: x['subtitles'], list) or []:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = url_or_none(sub.get('webVtt'))
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_key = str_or_none(sub.get('language')) or 'nb'
|
||||
sub_type = str_or_none(sub.get('type'))
|
||||
if sub_type:
|
||||
sub_key += '-%s' % sub_type
|
||||
subtitles.setdefault(sub_key, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
legal_age = try_get(
|
||||
data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
|
||||
# https://en.wikipedia.org/wiki/Norwegian_Media_Authority
|
||||
age_limit = None
|
||||
if legal_age:
|
||||
if legal_age == 'A':
|
||||
age_limit = 0
|
||||
elif legal_age.isdigit():
|
||||
age_limit = int_or_none(legal_age)
|
||||
|
||||
is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_playback(video_id)
|
||||
if is_series:
|
||||
series = season_id = season_number = episode = episode_number = None
|
||||
programs = self._call_api(
|
||||
'programs/%s' % video_id, video_id, 'programs', fatal=False)
|
||||
if programs and isinstance(programs, dict):
|
||||
series = str_or_none(programs.get('seriesTitle'))
|
||||
season_id = str_or_none(programs.get('seasonId'))
|
||||
season_number = int_or_none(programs.get('seasonNumber'))
|
||||
episode = str_or_none(programs.get('episodeTitle'))
|
||||
episode_number = int_or_none(programs.get('episodeNumber'))
|
||||
if not series:
|
||||
series = title
|
||||
if alt_title:
|
||||
title += ' - %s' % alt_title
|
||||
if not season_number:
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'Sesong\s+(\d+)', description or '', 'season number',
|
||||
default=None))
|
||||
if not episode:
|
||||
episode = alt_title if is_series else None
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'^(\d+)\.', episode or '', 'episode number',
|
||||
default=None))
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'\((\d+)\s*:\s*\d+\)', description or '',
|
||||
'episode number', default=None))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
class NRKTVIE(InfoExtractor):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
|
||||
'info_dict': {
|
||||
'id': 'MDDP12000117AA',
|
||||
'id': 'MDDP12000117',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alarm Trolltunga',
|
||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
'duration': 2223.44,
|
||||
'age_limit': 6,
|
||||
'subtitles': {
|
||||
'nb-nor': [{
|
||||
'ext': 'vtt',
|
||||
}],
|
||||
'nb-ttv': [{
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'id': 'MUHH48000314',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'title': '20 spørsmål - 23. mai 2014',
|
||||
'alt_title': '23. mai 2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
'episode': '23. mai 2014',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'id': 'MDFP15000514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'title': 'Kunnskapskanalen - Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605.08,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -229,10 +340,11 @@ class NRKTVIE(NRKBaseIE):
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'id': 'MSPO40010515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -242,24 +354,27 @@ class NRKTVIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'id': 'MSPO40010515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'skip': 'Ikke tilgjengelig utenfor Norge',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'id': 'KMTE50001317',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'title': 'Anno - 13. episode',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'episode': '13. episode',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -267,13 +382,14 @@ class NRKTVIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'id': 'MUHH46000317',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -290,180 +406,26 @@ class NRKTVIE(NRKBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_api_host = None
|
||||
|
||||
def _extract_from_mediaelement(self, video_id):
|
||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||
|
||||
for api_host in api_hosts:
|
||||
data = self._download_json(
|
||||
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||
video_id, 'Downloading mediaelement JSON',
|
||||
fatal=api_host == api_hosts[-1])
|
||||
if not data:
|
||||
continue
|
||||
self._api_host = api_host
|
||||
break
|
||||
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
urls = []
|
||||
entries = []
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
live = (data.get('mediaElementType') == 'Live'
|
||||
or data.get('isLive') is True or conviva.get('isLive'))
|
||||
|
||||
def make_title(t):
|
||||
return self._live_title(t) if live else t
|
||||
|
||||
media_assets = data.get('mediaAssets')
|
||||
if media_assets and isinstance(media_assets, list):
|
||||
def video_id_and_title(idx):
|
||||
return ((video_id, title) if len(media_assets) == 1
|
||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||
for num, asset in enumerate(media_assets, 1):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url or asset_url in urls:
|
||||
continue
|
||||
urls.append(asset_url)
|
||||
formats = self._extract_nrk_formats(asset_url, video_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
entry_id, entry_title = video_id_and_title(num)
|
||||
duration = parse_duration(asset.get('duration'))
|
||||
subtitles = {}
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({
|
||||
'url': compat_urllib_parse_unquote(subtitle_url)
|
||||
})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': make_title(entry_title),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
media_url = data.get('mediaUrl')
|
||||
if media_url and media_url not in urls:
|
||||
formats = self._extract_nrk_formats(media_url, video_id)
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': make_title(title),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
self._raise_error(data)
|
||||
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
web_images = images.get('webImages')
|
||||
if isinstance(web_images, list):
|
||||
thumbnails = [{
|
||||
'url': image['imageUrl'],
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
|
||||
|
||||
for entry in entries:
|
||||
entry.update(common_info)
|
||||
for f in entry['formats']:
|
||||
f['vcodec'] = vcodec
|
||||
|
||||
points = data.get('shortIndexPoints')
|
||||
if isinstance(points, list):
|
||||
chapters = []
|
||||
for next_num, point in enumerate(points, start=1):
|
||||
if not isinstance(point, dict):
|
||||
continue
|
||||
start_time = parse_duration(point.get('startPoint'))
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = parse_duration(
|
||||
data.get('duration')
|
||||
if next_num == len(points)
|
||||
else points[next_num].get('startPoint'))
|
||||
if end_time is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': point.get('title'),
|
||||
})
|
||||
if chapters and len(entries) == 1:
|
||||
entries[0]['chapters'] = chapters
|
||||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_mediaelement(video_id)
|
||||
return self.url_result(
|
||||
'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
|
||||
'info_dict': {
|
||||
'id': 'MUHH36005220BA',
|
||||
'id': 'MUHH36005220',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kro, krig og kjærlighet 2:6',
|
||||
'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
|
||||
'duration': 1563,
|
||||
'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
|
||||
'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
|
||||
'duration': 1563.92,
|
||||
'series': 'Hellums kro',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
'episode': '2:6',
|
||||
'episode': '2. Kro, krig og kjærlighet',
|
||||
'age_limit': 6,
|
||||
},
|
||||
'params': {
|
||||
@@ -472,15 +434,16 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
|
||||
'info_dict': {
|
||||
'id': 'MSUI14000816AA',
|
||||
'id': 'MSUI14000816',
|
||||
'ext': 'mp4',
|
||||
'title': 'Backstage 8:30',
|
||||
'title': 'Backstage - 8. episode',
|
||||
'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
|
||||
'duration': 1320,
|
||||
'series': 'Backstage',
|
||||
'season_number': 1,
|
||||
'episode_number': 8,
|
||||
'episode': '8:30',
|
||||
'episode': '8. episode',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -489,7 +452,7 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id, season_number, episode_number = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
@@ -501,10 +464,12 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'_type': 'url',
|
||||
'id': nrk_id,
|
||||
'url': 'nrk:%s' % nrk_id,
|
||||
'ie_key': NRKIE.ie_key(),
|
||||
'season_number': int(season_number),
|
||||
'episode_number': int(episode_number),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -518,8 +483,6 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
return entries
|
||||
@@ -531,6 +494,10 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
if embedded.get(asset_key):
|
||||
return asset_key
|
||||
|
||||
@staticmethod
|
||||
def _catalog_name(serie_kind):
|
||||
return 'podcast' if serie_kind in ('podcast', 'podkast') else 'series'
|
||||
|
||||
def _entries(self, data, display_id):
|
||||
for page_num in itertools.count(1):
|
||||
embedded = data.get('_embedded') or data
|
||||
@@ -564,7 +531,16 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
|
||||
|
||||
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<domain>tv|radio)\.nrk\.no/
|
||||
(?P<serie_kind>serie|pod[ck]ast)/
|
||||
(?P<serie>[^/]+)/
|
||||
(?:
|
||||
(?:sesong/)?(?P<id>\d+)|
|
||||
sesong/(?P<id_2>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||
'info_dict': {
|
||||
@@ -600,19 +576,34 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
# 180 entries, single page
|
||||
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/diagnose-kverulant',
|
||||
'info_dict': {
|
||||
'id': 'hele_historien/diagnose-kverulant',
|
||||
'title': 'Diagnose kverulant',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/loerdagsraadet/sesong/202101',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url)
|
||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, serie, season_id = re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain = mobj.group('domain')
|
||||
serie_kind = mobj.group('serie_kind')
|
||||
serie = mobj.group('serie')
|
||||
season_id = mobj.group('id') or mobj.group('id_2')
|
||||
display_id = '%s/%s' % (serie, season_id)
|
||||
|
||||
data = self._call_api(
|
||||
'%s/catalog/series/%s/seasons/%s' % (domain, serie, season_id),
|
||||
'%s/catalog/%s/%s/seasons/%s'
|
||||
% (domain, self._catalog_name(serie_kind), serie, season_id),
|
||||
display_id, 'season', query={'pageSize': 50})
|
||||
|
||||
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
|
||||
@@ -622,7 +613,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
|
||||
|
||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/serie/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/(?P<serie_kind>serie|pod[ck]ast)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# new layout, instalments
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
@@ -682,23 +673,33 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
}, {
|
||||
'url': 'https://nrksuper.no/serie/labyrint',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
|
||||
'info_dict': {
|
||||
'id': 'ulrikkes_univers',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/nrkno-poddkast-26588-134079-05042018030000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
False if any(ie.suitable(url)
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE))
|
||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, series_id = re.match(self._VALID_URL, url).groups()
|
||||
site, serie_kind, series_id = re.match(self._VALID_URL, url).groups()
|
||||
is_radio = site == 'radio.nrk'
|
||||
domain = 'radio' if is_radio else 'tv'
|
||||
|
||||
size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
|
||||
series = self._call_api(
|
||||
'%s/catalog/series/%s' % (domain, series_id),
|
||||
'%s/catalog/%s/%s'
|
||||
% (domain, self._catalog_name(serie_kind), series_id),
|
||||
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
||||
titles = try_get(series, [
|
||||
lambda x: x['titles'],
|
||||
@@ -713,12 +714,14 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
embedded_seasons = embedded.get('seasons') or []
|
||||
if len(linked_seasons) > len(embedded_seasons):
|
||||
for season in linked_seasons:
|
||||
season_name = season.get('name')
|
||||
if season_name and isinstance(season_name, compat_str):
|
||||
season_url = urljoin(url, season.get('href'))
|
||||
if not season_url:
|
||||
season_name = season.get('name')
|
||||
if season_name and isinstance(season_name, compat_str):
|
||||
season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name)
|
||||
if season_url:
|
||||
entries.append(self.url_result(
|
||||
'https://%s.nrk.no/serie/%s/sesong/%s'
|
||||
% (domain, series_id, season_name),
|
||||
ie=NRKTVSeasonIE.ie_key(),
|
||||
season_url, ie=NRKTVSeasonIE.ie_key(),
|
||||
video_title=season.get('title')))
|
||||
else:
|
||||
for season in embedded_seasons:
|
||||
@@ -743,6 +746,38 @@ class NRKTVDirekteIE(NRKTVIE):
|
||||
}]
|
||||
|
||||
|
||||
class NRKRadioPodkastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/sesong/1/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/bortfoert-i-bergen/l_774d1a2c-7aa7-4965-8d1a-2c7aa7d9652c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class NRKPlaylistBaseIE(InfoExtractor):
|
||||
def _extract_description(self, webpage):
|
||||
pass
|
||||
|
@@ -450,6 +450,18 @@ class PeerTubeIE(InfoExtractor):
|
||||
'tags': ['framasoft', 'peertube'],
|
||||
'categories': ['Science & Technology'],
|
||||
}
|
||||
}, {
|
||||
# Issue #26002
|
||||
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||
'info_dict': {
|
||||
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dot matrix printer shell demo',
|
||||
'uploader_id': '3',
|
||||
'timestamp': 1587401293,
|
||||
'upload_date': '20200420',
|
||||
'uploader': 'Drew DeVault',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||
'only_matching': True,
|
||||
@@ -526,7 +538,15 @@ class PeerTubeIE(InfoExtractor):
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
for file_ in video['files']:
|
||||
files = video.get('files') or []
|
||||
for playlist in (video.get('streamingPlaylists') or []):
|
||||
if not isinstance(playlist, dict):
|
||||
continue
|
||||
playlist_files = playlist.get('files')
|
||||
if not (playlist_files and isinstance(playlist_files, list)):
|
||||
continue
|
||||
files.extend(playlist_files)
|
||||
for file_ in files:
|
||||
if not isinstance(file_, dict):
|
||||
continue
|
||||
file_url = url_or_none(file_.get('fileUrl'))
|
||||
|
@@ -6,16 +6,33 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
(?:
|
||||
player\.
|
||||
(?:
|
||||
olympusattelecom|
|
||||
vibebyvista
|
||||
)|
|
||||
(?:api|player)\.multicastmedia|
|
||||
(?:api-ovp|player)\.piksel
|
||||
)\.com|
|
||||
(?:
|
||||
mz-edge\.stream\.co|
|
||||
movie-s\.nhk\.or
|
||||
)\.jp|
|
||||
vidego\.baltimorecity\.gov
|
||||
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/ums2867l',
|
||||
@@ -56,46 +73,41 @@ class PikselIE(InfoExtractor):
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _call_api(self, app_token, resource, display_id, query, fatal=True):
|
||||
response = (self._download_json(
|
||||
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
|
||||
display_id, query=query, fatal=fatal) or {}).get('response')
|
||||
failure = try_get(response, lambda x: x['failure']['reason'])
|
||||
if failure:
|
||||
if fatal:
|
||||
raise ExtractorError(failure, expected=True)
|
||||
self.report_warning(failure)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
ref_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
|
||||
webpage, 'program uuid', default=display_id)
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
'v': video_id
|
||||
})['response']
|
||||
failure = response.get('failure')
|
||||
if failure:
|
||||
raise ExtractorError(response['failure']['reason'], expected=True)
|
||||
video_data = response['WsProgramResponse']['program']['asset']
|
||||
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
|
||||
program = self._call_api(
|
||||
app_token, 'program', display_id, query)['WsProgramResponse']['program']
|
||||
video_id = program['uuid']
|
||||
video_data = program['asset']
|
||||
title = video_data['title']
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
for asset_file in video_data.get('assetFiles', []):
|
||||
def process_asset_file(asset_file):
|
||||
if not asset_file:
|
||||
return
|
||||
# TODO: extract rtmp formats
|
||||
http_url = asset_file.get('http_url')
|
||||
if not http_url:
|
||||
continue
|
||||
return
|
||||
tbr = None
|
||||
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
|
||||
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
|
||||
@@ -118,6 +130,43 @@ class PikselIE(InfoExtractor):
|
||||
'filesize': int_or_none(asset_file.get('filesize')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
def process_asset_files(asset_files):
|
||||
for asset_file in (asset_files or []):
|
||||
process_asset_file(asset_file)
|
||||
|
||||
process_asset_files(video_data.get('assetFiles'))
|
||||
process_asset_file(video_data.get('referenceFile'))
|
||||
if not formats:
|
||||
asset_id = video_data.get('assetid') or program.get('assetid')
|
||||
if asset_id:
|
||||
process_asset_files(try_get(self._call_api(
|
||||
app_token, 'asset_file', display_id, {
|
||||
'assetid': asset_id,
|
||||
}, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
|
||||
if smil_url:
|
||||
transform_source = None
|
||||
if ref_id == 'nhkworld':
|
||||
# TODO: figure out if this is something to be fixed in urljoin,
|
||||
# _parse_smil_formats or keep it here
|
||||
transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
|
||||
formats.extend(self._extract_smil_formats(
|
||||
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
|
||||
transform_source=transform_source, fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@@ -288,14 +288,24 @@ class PornHubIE(PornHubBaseIE):
|
||||
video_urls.append((v_url, None))
|
||||
video_urls_set.add(v_url)
|
||||
|
||||
def parse_quality_items(quality_items):
|
||||
q_items = self._parse_json(quality_items, video_id, fatal=False)
|
||||
if not isinstance(q_items, list):
|
||||
return
|
||||
for item in q_items:
|
||||
if isinstance(item, dict):
|
||||
add_video_url(item.get('url'))
|
||||
|
||||
if not video_urls:
|
||||
FORMAT_PREFIXES = ('media', 'quality')
|
||||
FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
|
||||
js_vars = extract_js_vars(
|
||||
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
|
||||
default=None)
|
||||
if js_vars:
|
||||
for key, format_url in js_vars.items():
|
||||
if any(key.startswith(p) for p in FORMAT_PREFIXES):
|
||||
if key.startswith(FORMAT_PREFIXES[-1]):
|
||||
parse_quality_items(format_url)
|
||||
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
|
||||
add_video_url(format_url)
|
||||
if not video_urls and re.search(
|
||||
r'<[^>]+\bid=["\']lockedPlayer', webpage):
|
||||
@@ -351,12 +361,16 @@ class PornHubIE(PornHubBaseIE):
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
def extract_vote_count(kind, name):
|
||||
return self._extract_count(
|
||||
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
|
||||
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
|
||||
webpage, name)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
|
||||
like_count = extract_vote_count('Up', 'like')
|
||||
dislike_count = extract_vote_count('Down', 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
|
@@ -103,22 +103,28 @@ class RaiBaseIE(InfoExtractor):
|
||||
}.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
def _extract_subtitles(url, subtitle_url):
|
||||
def _extract_subtitles(url, video_data):
|
||||
STL_EXT = 'stl'
|
||||
SRT_EXT = 'srt'
|
||||
subtitles = {}
|
||||
if subtitle_url and isinstance(subtitle_url, compat_str):
|
||||
subtitle_url = urljoin(url, subtitle_url)
|
||||
STL_EXT = '.stl'
|
||||
SRT_EXT = '.srt'
|
||||
subtitles['it'] = [{
|
||||
'ext': 'stl',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
if subtitle_url.endswith(STL_EXT):
|
||||
srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT
|
||||
subtitles['it'].append({
|
||||
'ext': 'srt',
|
||||
'url': srt_url,
|
||||
subtitles_array = video_data.get('subtitlesArray') or []
|
||||
for k in ('subtitles', 'subtitlesUrl'):
|
||||
subtitles_array.append({'url': video_data.get(k)})
|
||||
for subtitle in subtitles_array:
|
||||
sub_url = subtitle.get('url')
|
||||
if sub_url and isinstance(sub_url, compat_str):
|
||||
sub_lang = subtitle.get('language') or 'it'
|
||||
sub_url = urljoin(url, sub_url)
|
||||
sub_ext = determine_ext(sub_url, SRT_EXT)
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': sub_ext,
|
||||
'url': sub_url,
|
||||
})
|
||||
if STL_EXT == sub_ext:
|
||||
subtitles[sub_lang].append({
|
||||
'ext': SRT_EXT,
|
||||
'url': sub_url[:-len(STL_EXT)] + SRT_EXT,
|
||||
})
|
||||
return subtitles
|
||||
|
||||
|
||||
@@ -138,6 +144,9 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'duration': 6160,
|
||||
'series': 'Report',
|
||||
'season': '2013/14',
|
||||
'subtitles': {
|
||||
'it': 'count:2',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -145,6 +154,10 @@ class RaiPlayIE(RaiBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# subtitles at 'subtitlesArray' key (see #27698)
|
||||
'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -172,7 +185,7 @@ class RaiPlayIE(RaiBaseIE):
|
||||
if date_published and time_published:
|
||||
date_published += ' ' + time_published
|
||||
|
||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||
subtitles = self._extract_subtitles(url, video)
|
||||
|
||||
program_info = media.get('program_info') or {}
|
||||
season = media.get('season')
|
||||
@@ -326,6 +339,22 @@ class RaiIE(RaiBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# ContentItem in iframe (see #12652) and subtitle at 'subtitlesUrl' key
|
||||
'url': 'http://www.presadiretta.rai.it/dl/portali/site/puntata/ContentItem-3ed19d13-26c2-46ff-a551-b10828262f1b.html',
|
||||
'info_dict': {
|
||||
'id': '1ad6dc64-444a-42a4-9bea-e5419ad2f5fd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partiti acchiappavoti - Presa diretta del 13/09/2015',
|
||||
'description': 'md5:d291b03407ec505f95f27970c0b025f4',
|
||||
'upload_date': '20150913',
|
||||
'subtitles': {
|
||||
'it': 'count:2',
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Direct MMS URL
|
||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
|
||||
@@ -366,7 +395,7 @@ class RaiIE(RaiBaseIE):
|
||||
'url': compat_urlparse.urljoin(url, thumbnail_url),
|
||||
})
|
||||
|
||||
subtitles = self._extract_subtitles(url, media.get('subtitlesUrl'))
|
||||
subtitles = self._extract_subtitles(url, media)
|
||||
|
||||
info = {
|
||||
'id': content_id,
|
||||
@@ -403,7 +432,8 @@ class RaiIE(RaiBaseIE):
|
||||
r'''(?x)
|
||||
(?:
|
||||
(?:initEdizione|drawMediaRaiTV)\(|
|
||||
<(?:[^>]+\bdata-id|var\s+uniquename)=
|
||||
<(?:[^>]+\bdata-id|var\s+uniquename)=|
|
||||
<iframe[^>]+\bsrc=
|
||||
)
|
||||
(["\'])
|
||||
(?:(?!\1).)*\bContentItem-(?P<id>%s)
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -56,7 +57,8 @@ class RedditRIE(InfoExtractor):
|
||||
'id': 'zv89llsvexdz',
|
||||
'ext': 'mp4',
|
||||
'title': 'That small heart attack.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:4',
|
||||
'timestamp': 1501941939,
|
||||
'upload_date': '20170805',
|
||||
'uploader': 'Antw87',
|
||||
@@ -118,11 +120,34 @@ class RedditRIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
thumbnails = []
|
||||
|
||||
def add_thumbnail(src):
|
||||
if not isinstance(src, dict):
|
||||
return
|
||||
thumbnail_url = url_or_none(src.get('url'))
|
||||
if not thumbnail_url:
|
||||
return
|
||||
thumbnails.append({
|
||||
'url': unescapeHTML(thumbnail_url),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
})
|
||||
|
||||
for image in try_get(data, lambda x: x['preview']['images']) or []:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
add_thumbnail(image.get('source'))
|
||||
resolutions = image.get('resolutions')
|
||||
if isinstance(resolutions, list):
|
||||
for resolution in resolutions:
|
||||
add_thumbnail(resolution)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'title': data.get('title'),
|
||||
'thumbnail': url_or_none(data.get('thumbnail')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': float_or_none(data.get('created_utc')),
|
||||
'uploader': data.get('author'),
|
||||
'duration': int_or_none(try_get(
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
class SBSIE(InfoExtractor):
|
||||
IE_DESC = 'sbs.com.au'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=)|news/(?:embeds/)?video/)(?P<id>[0-9]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
@@ -18,7 +18,7 @@ class SBSIE(InfoExtractor):
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||
'info_dict': {
|
||||
'id': '320403011771',
|
||||
'id': '_rFBPRPO4pMR',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dingo Conservation (The Feed)',
|
||||
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
||||
@@ -34,6 +34,15 @@ class SBSIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -4,8 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -41,16 +45,22 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
def _real_extract(self, url):
|
||||
path, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
try:
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), episode_id)[0]['error_code'], expected=True)
|
||||
raise
|
||||
|
||||
for source in media.get('sources', {}):
|
||||
src = source.get('src')
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
@@ -11,36 +13,59 @@ from ..utils import (
|
||||
|
||||
|
||||
class SkyBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
|
||||
webpage, 'video data'))
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
_SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'
|
||||
|
||||
video_url = 'ooyala:%s' % video_data['data-video-id']
|
||||
if video_data.get('data-token-required') == 'true':
|
||||
token_fetch_options = self._parse_json(video_data.get(
|
||||
'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
|
||||
token_fetch_url = token_fetch_options.get('url')
|
||||
if token_fetch_url:
|
||||
embed_token = self._download_webpage(urljoin(
|
||||
url, token_fetch_url), video_id, fatal=False)
|
||||
if embed_token:
|
||||
video_url = smuggle_url(
|
||||
video_url, {'embed_token': embed_token.strip('"')})
|
||||
def _process_ooyala_element(self, webpage, sdc_el, url):
|
||||
sdc = extract_attributes(sdc_el)
|
||||
provider = sdc.get('data-provider')
|
||||
if provider == 'ooyala':
|
||||
video_id = sdc['data-sdc-video-id']
|
||||
video_url = 'ooyala:%s' % video_id
|
||||
ie_key = 'Ooyala'
|
||||
ooyala_el = self._search_regex(
|
||||
r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id,
|
||||
webpage, 'video data', fatal=False)
|
||||
if ooyala_el:
|
||||
ooyala_attrs = extract_attributes(ooyala_el) or {}
|
||||
if ooyala_attrs.get('data-token-required') == 'true':
|
||||
token_fetch_url = (self._parse_json(ooyala_attrs.get(
|
||||
'data-token-fetch-options', '{}'),
|
||||
video_id, fatal=False) or {}).get('url')
|
||||
if token_fetch_url:
|
||||
embed_token = self._download_json(urljoin(
|
||||
url, token_fetch_url), video_id, fatal=False)
|
||||
if embed_token:
|
||||
video_url = smuggle_url(
|
||||
video_url, {'embed_token': embed_token})
|
||||
elif provider == 'brightcove':
|
||||
video_id = sdc['data-video-id']
|
||||
account_id = sdc.get('data-account-id') or '6058004172001'
|
||||
player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
|
||||
video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id)
|
||||
ie_key = 'BrightcoveNew'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ie_key': ie_key,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._process_ooyala_element(webpage, self._search_regex(
|
||||
self._SDC_EL_REGEX, webpage, 'sdc element'), url)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': strip_or_none(self._og_search_description(webpage)),
|
||||
'ie_key': 'Ooyala',
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class SkySportsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
|
||||
@@ -62,15 +87,45 @@ class SkySportsIE(SkyBaseIE):
|
||||
|
||||
|
||||
class SkyNewsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:news'
|
||||
_VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
|
||||
'md5': 'd6327e581473cea9976a3236ded370cd',
|
||||
'md5': '411e8893fd216c75eaf7e4c65d364115',
|
||||
'info_dict': {
|
||||
'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
|
||||
'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Russian plane inspected after deadly fire',
|
||||
'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
|
||||
'uploader_id': '6058004172001',
|
||||
'timestamp': 1567112345,
|
||||
'upload_date': '20190829',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
|
||||
|
||||
class SkySportsNewsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:sports:news'
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass',
|
||||
'info_dict': {
|
||||
'id': '10871916',
|
||||
'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass',
|
||||
'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
|
||||
entries = []
|
||||
for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
|
||||
entries.append(self._process_ooyala_element(webpage, sdc_el, url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, article_id, self._og_search_title(webpage),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
@@ -1,40 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SonyLIVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
|
||||
'info_dict': {
|
||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||
'id': 'ref:5024612095001',
|
||||
'title': 'Bachelors Delight - Achaari Cheese Toast',
|
||||
'id': '1000022678',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170923',
|
||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||
'uploader_id': '5182475815001',
|
||||
'timestamp': 1506200547,
|
||||
'upload_date': '20200411',
|
||||
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
|
||||
'timestamp': 1586632091,
|
||||
'duration': 185,
|
||||
'season_number': 1,
|
||||
'episode': 'Achaari Cheese Toast',
|
||||
'episode_number': 1,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_TOKEN = None
|
||||
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
|
||||
def _call_api(self, version, path, video_id):
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['security_token'] = self._TOKEN
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
|
||||
video_id, headers=headers)['resultObj']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
if message == 'Geoblocked Country':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(message)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
||||
'geo_countries': ['IN'],
|
||||
'referrer': url,
|
||||
}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
video_id = self._match_id(url)
|
||||
content = self._call_api(
|
||||
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
|
||||
if content.get('isEncrypted'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
dash_url = content['videoURL']
|
||||
headers = {
|
||||
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
|
||||
}
|
||||
formats = self._extract_mpd_formats(
|
||||
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
|
||||
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._call_api(
|
||||
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
|
||||
title = metadata['title']
|
||||
episode = metadata.get('episodeTitle')
|
||||
if episode and title != episode:
|
||||
title += ' - ' + episode
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('posterURL'),
|
||||
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
|
||||
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'season_number': int_or_none(metadata.get('season')),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'release_year': int_or_none(metadata.get('year')),
|
||||
}
|
||||
|
@@ -7,17 +7,24 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?spankbang\.com/
|
||||
(?:
|
||||
(?P<id>[\da-z]+)/(?:video|play|embed)\b|
|
||||
[\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
@@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2y3td/embed/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
webpage = self._download_webpage(
|
||||
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||
video_id, headers={'Cookie': 'country=US'})
|
||||
@@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor):
|
||||
|
||||
|
||||
class SpankBangPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||
'info_dict': {
|
||||
'id': 'ug0k',
|
||||
'title': 'Big Ass Titties',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_mincount': 40,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||
|
||||
entries = [self.url_result(
|
||||
'https://spankbang.com/%s/video' % video_id,
|
||||
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||
urljoin(url, mobj.group('path')),
|
||||
ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
|
||||
% re.escape(display_id), webpage)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||
r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
@@ -40,9 +40,15 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
'arcEp': 'paramountnetwork.com',
|
||||
'mgid': uri,
|
||||
}
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
root_data = self._parse_json(self._search_regex(
|
||||
r'window\.__DATA__\s*=\s*({.+})',
|
||||
|
156
youtube_dl/extractor/spotify.py
Normal file
156
youtube_dl/extractor/spotify.py
Normal file
@@ -0,0 +1,156 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyBaseIE(InfoExtractor):
|
||||
_ACCESS_TOKEN = None
|
||||
_OPERATION_HASHES = {
|
||||
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||
}
|
||||
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ACCESS_TOKEN = self._download_json(
|
||||
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||
|
||||
def _call_api(self, operation, video_id, variables):
|
||||
return self._download_json(
|
||||
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||
'operationName': 'query' + operation,
|
||||
'variables': json.dumps(variables),
|
||||
'extensions': json.dumps({
|
||||
'persistedQuery': {
|
||||
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||
},
|
||||
})
|
||||
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
|
||||
|
||||
def _extract_episode(self, episode, series):
|
||||
episode_id = episode['id']
|
||||
title = episode['name'].strip()
|
||||
|
||||
formats = []
|
||||
audio_preview = episode.get('audioPreview') or {}
|
||||
audio_preview_url = audio_preview.get('url')
|
||||
if audio_preview_url:
|
||||
f = {
|
||||
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
audio_preview_format = audio_preview.get('format')
|
||||
if audio_preview_format:
|
||||
f['format_id'] = audio_preview_format
|
||||
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||
if mobj:
|
||||
f.update({
|
||||
'abr': int(mobj.group(2)),
|
||||
'ext': mobj.group(1).lower(),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||
item_url = item.get('url')
|
||||
if not (item_url and item.get('externallyHosted')):
|
||||
continue
|
||||
formats.append({
|
||||
'url': clean_podcast_url(item_url),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'duration': float_or_none(try_get(
|
||||
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||
'release_date': unified_strdate(try_get(
|
||||
episode, lambda x: x['releaseDate']['isoString'])),
|
||||
'series': series,
|
||||
}
|
||||
|
||||
|
||||
class SpotifyIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||
'info_dict': {
|
||||
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||
'ext': 'mp3',
|
||||
'title': 'From the archive: Why time management is ruining our lives',
|
||||
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||
'duration': 2083.605,
|
||||
'release_date': '20201217',
|
||||
'series': "The Guardian's Audio Long Reads",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('Episode', episode_id, {
|
||||
'uri': 'spotify:episode:' + episode_id
|
||||
})['episode']
|
||||
return self._extract_episode(
|
||||
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||
|
||||
|
||||
class SpotifyShowIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify:show'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||
'info_dict': {
|
||||
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||
'title': 'The Story from the Guardian',
|
||||
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
podcast = self._call_api('ShowEpisodes', show_id, {
|
||||
'limit': 1000000000,
|
||||
'offset': 0,
|
||||
'uri': 'spotify:show:' + show_id,
|
||||
})['podcast']
|
||||
podcast_name = podcast.get('name')
|
||||
|
||||
entries = []
|
||||
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
|
||||
episode = item.get('episode')
|
||||
if not episode:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, podcast_name))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, podcast_name, podcast.get('description'))
|
@@ -3,50 +3,62 @@ from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'md5': '74bf14128578d1e040c3ebc82088f45f',
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': '9dexnwtmh8_X',
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Cowboy Adventure',
|
||||
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
|
||||
'timestamp': 1437758640,
|
||||
'upload_date': '20150724',
|
||||
'uploader': 'NBCU-SPROUT-NEW',
|
||||
}
|
||||
}
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_component = self._search_regex(
|
||||
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
|
||||
webpage, 'video component', default=None)
|
||||
if video_component:
|
||||
options = self._parse_json(extract_attributes(
|
||||
video_component)['data-options'], video_id)
|
||||
theplatform_url = options['video']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if options.get('protected'):
|
||||
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
theplatform_url, query), {'force_smil_url': True})
|
||||
else:
|
||||
iframe = self._search_regex(
|
||||
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
|
||||
webpage, 'iframe')
|
||||
theplatform_url = extract_attributes(iframe)['src']
|
||||
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@@ -1,28 +1,74 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
clean_podcast_url,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StitcherIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
|
||||
class StitcherBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/'
|
||||
|
||||
def _call_api(self, path, video_id, query):
|
||||
resp = self._download_json(
|
||||
'https://api.prod.stitcher.com/' + path,
|
||||
video_id, query=query)
|
||||
error_massage = try_get(resp, lambda x: x['errors'][0]['message'])
|
||||
if error_massage:
|
||||
raise ExtractorError(error_massage, expected=True)
|
||||
return resp['data']
|
||||
|
||||
def _extract_description(self, data):
|
||||
return clean_html(data.get('html_description') or data.get('description'))
|
||||
|
||||
def _extract_audio_url(self, episode):
|
||||
return url_or_none(episode.get('audio_url') or episode.get('guid'))
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'thumbnail': show.get('image_base_url'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _extract_episode(self, episode, audio_url, show_info):
|
||||
info = {
|
||||
'id': compat_str(episode['id']),
|
||||
'display_id': episode.get('slug'),
|
||||
'title': episode['title'].strip(),
|
||||
'description': self._extract_description(episode),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'url': clean_podcast_url(audio_url),
|
||||
'vcodec': 'none',
|
||||
'timestamp': int_or_none(episode.get('date_published')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
|
||||
class StitcherIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?:[^/]+/)+e(?:pisode)?/(?:[^/#?&]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
|
||||
'md5': '391dd4e021e6edeb7b8e68fbf2e9e940',
|
||||
'md5': 'e9635098e0da10b21a0e2b85585530f6',
|
||||
'info_dict': {
|
||||
'id': '40789481',
|
||||
'ext': 'mp3',
|
||||
'title': 'Machine Learning Mastery and Cancer Clusters',
|
||||
'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3',
|
||||
'description': 'md5:547adb4081864be114ae3831b4c2b42f',
|
||||
'duration': 1604,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20151008',
|
||||
'timestamp': 1444285800,
|
||||
'series': 'Talking Machines',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
|
||||
@@ -38,6 +84,7 @@ class StitcherIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
}, {
|
||||
# escaped title
|
||||
'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
|
||||
@@ -45,37 +92,53 @@ class StitcherIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
audio_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or audio_id
|
||||
audio_id = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'shows/episodes', audio_id, {'episode_ids': audio_id})
|
||||
episode = data['episodes'][0]
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
self.raise_login_required()
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
return self._extract_episode(
|
||||
episode, audio_url, self._extract_show_info(show))
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
episode = self._parse_json(
|
||||
js_to_json(self._search_regex(
|
||||
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
|
||||
display_id)['config']['episode']
|
||||
class StitcherShowIE(StitcherBaseIE):
|
||||
_VALID_URL = StitcherBaseIE._VALID_URL_BASE + r'(?P<id>[^/#?&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines',
|
||||
'info_dict': {
|
||||
'id': 'the-talking-machines',
|
||||
'title': 'Talking Machines',
|
||||
'description': 'md5:831f0995e40f26c10231af39cf1ebf0b',
|
||||
},
|
||||
'playlist_mincount': 106,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/the-talking-machines',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
title = unescapeHTML(episode['title'])
|
||||
formats = [{
|
||||
'url': episode[episode_key],
|
||||
'ext': determine_ext(episode[episode_key]) or 'mp3',
|
||||
'vcodec': 'none',
|
||||
} for episode_key in ('episodeURL',) if episode.get(episode_key)]
|
||||
description = self._search_regex(
|
||||
r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False)
|
||||
duration = int_or_none(episode.get('duration'))
|
||||
thumbnail = episode.get('episodeImage')
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
data = self._call_api(
|
||||
'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000})
|
||||
show = try_get(data, lambda x: x['shows'][0], dict) or {}
|
||||
show_info = self._extract_show_info(show)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
entries = []
|
||||
for episode in (data.get('episodes') or []):
|
||||
audio_url = self._extract_audio_url(episode)
|
||||
if not audio_url:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, audio_url, show_info))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_slug, show.get('title'),
|
||||
self._extract_description(show))
|
||||
|
@@ -2,25 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class StreetVoiceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
||||
'md5': '15974627fc01a29e492c98593c2fd472',
|
||||
'url': 'https://streetvoice.com/skippylu/songs/123688/',
|
||||
'md5': '0eb535970629a5195685355f3ed60bfd',
|
||||
'info_dict': {
|
||||
'id': '94440',
|
||||
'id': '123688',
|
||||
'ext': 'mp3',
|
||||
'title': '輸',
|
||||
'description': 'Crispy脆樂團 - 輸',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 260,
|
||||
'upload_date': '20091018',
|
||||
'title': '流浪',
|
||||
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 270,
|
||||
'upload_date': '20100923',
|
||||
'uploader': 'Crispy脆樂團',
|
||||
'uploader_id': '627810',
|
||||
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
|
||||
'timestamp': 1285261661,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'track': '流浪',
|
||||
'track_id': '123688',
|
||||
'album': '2010',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||
@@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
song = self._download_json(
|
||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
||||
|
||||
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
|
||||
song = self._download_json(base_url, song_id, query={
|
||||
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
|
||||
})
|
||||
title = song['name']
|
||||
author = song['user']['nickname']
|
||||
|
||||
formats = []
|
||||
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
|
||||
f_url = (self._download_json(
|
||||
base_url + suffix + '/', song_id,
|
||||
'Downloading %s format URL' % format_id,
|
||||
data=b'', fatal=False) or {}).get('file')
|
||||
if not f_url:
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp3',
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if format_id == 'hls':
|
||||
f['protocol'] = 'm3u8_native'
|
||||
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
|
||||
if abr:
|
||||
abr = int(abr)
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
user = song.get('user') or {}
|
||||
username = user.get('username')
|
||||
get_count = lambda x: int_or_none(song.get(x + '_count'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song['file'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': '%s - %s' % (author, title),
|
||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||
'duration': song.get('length'),
|
||||
'upload_date': unified_strdate(song.get('created_at')),
|
||||
'uploader': author,
|
||||
'uploader_id': compat_str(song['user']['id']),
|
||||
'description': strip_or_none(song.get('synopsis')),
|
||||
'thumbnail': song.get('image'),
|
||||
'duration': int_or_none(song.get('length')),
|
||||
'timestamp': parse_iso8601(song.get('created_at')),
|
||||
'uploader': try_get(user, lambda x: x['profile']['nickname']),
|
||||
'uploader_id': str_or_none(user.get('id')),
|
||||
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
|
||||
'view_count': get_count('plays'),
|
||||
'like_count': get_count('likes'),
|
||||
'comment_count': get_count('comments'),
|
||||
'repost_count': get_count('share'),
|
||||
'track': title,
|
||||
'track_id': song_id,
|
||||
'album': try_get(song, lambda x: x['album']['name']),
|
||||
}
|
||||
|
@@ -8,13 +8,17 @@ from ..utils import (
|
||||
compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class STVPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'stv:player'
|
||||
_VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# shortform
|
||||
'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
|
||||
'md5': '5adf9439c31d554f8be0707c7abe7e0a',
|
||||
'info_dict': {
|
||||
@@ -27,7 +31,11 @@ class STVPlayerIE(InfoExtractor):
|
||||
'uploader_id': '1486976045',
|
||||
},
|
||||
'skip': 'this resource is unavailable outside of the UK',
|
||||
}
|
||||
}, {
|
||||
# episodes
|
||||
'url': 'https://player.stv.tv/episode/4125/jennifer-saunders-memory-lane',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
|
||||
_PTYPE_MAP = {
|
||||
'episode': 'episodes',
|
||||
@@ -36,11 +44,31 @@ class STVPlayerIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
ptype, video_id = re.match(self._VALID_URL, url).groups()
|
||||
resp = self._download_json(
|
||||
'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
|
||||
video_id)
|
||||
|
||||
result = resp['results']
|
||||
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||
props = (self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||
webpage, 'next data', default='{}'), video_id,
|
||||
fatal=False) or {}).get('props') or {}
|
||||
player_api_cache = try_get(
|
||||
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
||||
|
||||
api_path, resp = None, {}
|
||||
for k, v in player_api_cache.items():
|
||||
if k.startswith('/episodes/') or k.startswith('/shortform/'):
|
||||
api_path, resp = k, v
|
||||
break
|
||||
else:
|
||||
episode_id = str_or_none(try_get(
|
||||
props, lambda x: x['pageProps']['episodeId']))
|
||||
api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id)
|
||||
|
||||
result = resp.get('results')
|
||||
if not result:
|
||||
resp = self._download_json(
|
||||
'https://player.api.stv.tv/v1' + api_path, video_id)
|
||||
result = resp['results']
|
||||
|
||||
video = result['video']
|
||||
video_id = compat_str(video['id'])
|
||||
|
||||
@@ -57,7 +85,7 @@ class STVPlayerIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id,
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['GB']}),
|
||||
'description': result.get('summary'),
|
||||
'duration': float_or_none(video.get('length'), 1000),
|
||||
'subtitles': subtitles,
|
||||
|
@@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE):
|
||||
@staticmethod
|
||||
def _is_teachable(webpage):
|
||||
return 'teachableTracker.linker:autoLink' in webpage and re.search(
|
||||
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
|
||||
r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
|
||||
webpage)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -5,14 +5,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'md5': '7ee56d665cfd241c0e6d80fd175068b0',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
@@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor):
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'md5': 'eddb50291df704ce23c74821b995bcac',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor):
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
stream = caronte['dls'][0]['stream']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
'Origin': re.match(r'https?://[^/]+', url).group(0),
|
||||
})
|
||||
cdn = self._download_json(
|
||||
caronte['cerbero'], video_id, data=json.dumps({
|
||||
'bbx': caronte['bbx'],
|
||||
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
|
||||
}).encode(), headers=headers)['tokens']['1']['cdn']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
'duration': int_or_none(content.get('dataDuration')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
# smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,14 +25,16 @@ class TenPlayIE(InfoExtractor):
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# 'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
_GEO_BYPASS = False
|
||||
_FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect'
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
@@ -40,19 +43,28 @@ class TenPlayIE(InfoExtractor):
|
||||
video = data.get('video') or {}
|
||||
metadata = data.get('metaData') or {}
|
||||
brightcove_id = video.get('videoId') or metadata['showContentVideoId']
|
||||
brightcove_url = smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['AU']})
|
||||
# brightcove_url = smuggle_url(
|
||||
# self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
# {'geo_countries': ['AU']})
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': brightcove_url,
|
||||
'id': content_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': brightcove_url,
|
||||
'formats': formats,
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
|
||||
'description': video.get('description'),
|
||||
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
|
||||
'series': metadata.get('showName'),
|
||||
'season': metadata.get('showContentSeason'),
|
||||
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'thumbnail': video.get('poster'),
|
||||
'uploader_id': '2199827728001',
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
provider_id = mobj.group('provider_id')
|
||||
|
@@ -1,18 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TheWeatherChannelIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
|
||||
'md5': 'ab924ac9574e79689c24c6b95e957def',
|
||||
'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
|
||||
'info_dict': {
|
||||
'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
|
||||
'ext': 'mp4',
|
||||
@@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
|
||||
'uploader': 'TWC - Digital (No Distro)',
|
||||
'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
|
||||
'upload_date': '20160720',
|
||||
'timestamp': 1469018835,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
video_id = drupal_settings['twc']['contexts']['node']['uuid']
|
||||
video_data = self._download_json(
|
||||
'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
|
||||
asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not locale:
|
||||
locale = 'en-US'
|
||||
video_data = list(self._download_json(
|
||||
'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
|
||||
'name': 'getCMSAssetsUrlConfig',
|
||||
'params': {
|
||||
'language': locale.replace('-', '_'),
|
||||
'query': {
|
||||
'assetName': {
|
||||
'$in': asset_name,
|
||||
},
|
||||
},
|
||||
}
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
|
||||
video_id = video_data['id']
|
||||
seo_meta = video_data.get('seometa', {})
|
||||
title = video_data.get('title') or seo_meta['title']
|
||||
|
||||
@@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
cc_url = video_data.get('cc_url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'uploader': video_data.get('providername'),
|
||||
'uploader_id': video_data.get('providerid'),
|
||||
'timestamp': parse_iso8601(video_data.get('publishdate')),
|
||||
'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -3,10 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -15,29 +18,35 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
IE_DESC = '3Q SDN'
|
||||
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
||||
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
||||
'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
|
||||
# https://player.3qsdn.com/demo.html
|
||||
'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
|
||||
'md5': '64a57396b16fa011b15e0ea60edce918',
|
||||
'info_dict': {
|
||||
'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'id': '7201c779-6b3c-11e7-a40e-002590c750be',
|
||||
'ext': 'mp4',
|
||||
'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
|
||||
'title': 'Video Ads',
|
||||
'is_live': False,
|
||||
'description': 'Video Ads Demo',
|
||||
'timestamp': 1500334803,
|
||||
'upload_date': '20170717',
|
||||
'duration': 888.032,
|
||||
'subtitles': {
|
||||
'eng': 'count:1',
|
||||
},
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
|
||||
'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be',
|
||||
'info_dict': {
|
||||
'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
|
||||
'id': '66e68995-11ca-11e8-9273-002590c750be',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
}, {
|
||||
# live audio stream
|
||||
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
|
||||
@@ -58,6 +67,14 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
# live video with rtmp link
|
||||
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ondemand from http://www.philharmonie.tv/veranstaltung/26/
|
||||
'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# live video stream
|
||||
'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -70,73 +87,78 @@ class ThreeQSDNIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
js = self._download_webpage(
|
||||
'http://playout.3qsdn.com/%s' % video_id, video_id,
|
||||
query={'js': 'true'})
|
||||
try:
|
||||
config = self._download_json(
|
||||
url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
||||
if any(p in js for p in (
|
||||
'>This content is not available in your country',
|
||||
'playout.3qsdn.com/forbidden')):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
stream_content = self._search_regex(
|
||||
r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
|
||||
'stream content', default='demand', group='content')
|
||||
|
||||
live = stream_content == 'live'
|
||||
|
||||
stream_type = self._search_regex(
|
||||
r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
|
||||
'stream type', default='video', group='type')
|
||||
live = config.get('streamContent') == 'live'
|
||||
aspect = float_or_none(config.get('aspect'))
|
||||
|
||||
formats = []
|
||||
urls = set()
|
||||
|
||||
def extract_formats(item_url, item={}):
|
||||
if not item_url or item_url in urls:
|
||||
return
|
||||
urls.add(item_url)
|
||||
ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None)
|
||||
if ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
item_url, video_id, mpd_id='mpd', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
item_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8' if live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
item_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
if not self._is_valid_url(item_url, video_id):
|
||||
return
|
||||
formats.append({
|
||||
'url': item_url,
|
||||
'format_id': item.get('quality'),
|
||||
'ext': 'mp4' if item_url.startswith('rtsp') else ext,
|
||||
'vcodec': 'none' if stream_type == 'audio' else None,
|
||||
})
|
||||
|
||||
for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
|
||||
f = self._parse_json(
|
||||
item_js, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not f:
|
||||
for source_type, source in (config.get('sources') or {}).items():
|
||||
if not source:
|
||||
continue
|
||||
extract_formats(f.get('src'), f)
|
||||
if source_type == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source, video_id, mpd_id='mpd', fatal=False))
|
||||
elif source_type == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif source_type == 'progressive':
|
||||
for s in source:
|
||||
src = s.get('src')
|
||||
if not (src and self._is_valid_url(src, video_id)):
|
||||
continue
|
||||
width = None
|
||||
format_id = ['http']
|
||||
ext = determine_ext(src)
|
||||
if ext:
|
||||
format_id.append(ext)
|
||||
height = int_or_none(s.get('height'))
|
||||
if height:
|
||||
format_id.append('%dp' % height)
|
||||
if aspect:
|
||||
width = int(height * aspect)
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'format_id': '-'.join(format_id),
|
||||
'height': height,
|
||||
'source_preference': 0,
|
||||
'url': src,
|
||||
'vcodec': 'none' if height == 0 else None,
|
||||
'width': width,
|
||||
})
|
||||
for f in formats:
|
||||
if f.get('acodec') == 'none':
|
||||
f['preference'] = -40
|
||||
elif f.get('vcodec') == 'none':
|
||||
f['preference'] = -50
|
||||
self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id'))
|
||||
|
||||
# More relaxed version to collect additional URLs and acting
|
||||
# as a future-proof fallback
|
||||
for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
|
||||
extract_formats(src)
|
||||
subtitles = {}
|
||||
for subtitle in (config.get('subtitles') or []):
|
||||
src = subtitle.get('src')
|
||||
if not src:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('label') or 'eng', []).append({
|
||||
'url': src,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._live_title(video_id) if live else video_id
|
||||
title = config.get('title') or video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if live else title,
|
||||
'thumbnail': config.get('poster') or None,
|
||||
'description': config.get('description') or None,
|
||||
'timestamp': parse_iso8601(config.get('upload_date')),
|
||||
'duration': float_or_none(config.get('vlength')) or None,
|
||||
'is_live': live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor):
|
||||
|
||||
class MeWatchIE(InfoExtractor):
|
||||
IE_NAME = 'mewatch'
|
||||
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
|
||||
'info_dict': {
|
||||
@@ -214,6 +214,15 @@ class MeWatchIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -9,7 +9,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_kwargs,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
@@ -18,6 +17,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -42,30 +42,16 @@ class TwitchBaseIE(InfoExtractor):
|
||||
_CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
|
||||
_NETRC_MACHINE = 'twitch'
|
||||
|
||||
def _handle_error(self, response):
|
||||
if not isinstance(response, dict):
|
||||
return
|
||||
error = response.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, path, item_id, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {}).copy()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
})
|
||||
kwargs.update({
|
||||
'headers': headers,
|
||||
'expected_status': (400, 410),
|
||||
})
|
||||
response = self._download_json(
|
||||
'%s/%s' % (self._API_BASE, path), item_id,
|
||||
*args, **compat_kwargs(kwargs))
|
||||
self._handle_error(response)
|
||||
return response
|
||||
_OPERATION_HASHES = {
|
||||
'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
|
||||
'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
|
||||
'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
|
||||
'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
|
||||
'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
|
||||
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
|
||||
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
|
||||
'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
@@ -91,14 +77,14 @@ class TwitchBaseIE(InfoExtractor):
|
||||
|
||||
headers = {
|
||||
'Referer': page_url,
|
||||
'Origin': page_url,
|
||||
'Origin': 'https://www.twitch.tv',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
}
|
||||
|
||||
response = self._download_json(
|
||||
post_url, None, note, data=json.dumps(form).encode(),
|
||||
headers=headers, expected_status=400)
|
||||
error = response.get('error_description') or response.get('error_code')
|
||||
error = dict_get(response, ('error', 'error_description', 'error_code'))
|
||||
if error:
|
||||
fail(error)
|
||||
|
||||
@@ -151,13 +137,50 @@ class TwitchBaseIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
def _download_access_token(self, channel_name):
|
||||
return self._call_api(
|
||||
'api/channels/%s/access_token' % channel_name, channel_name,
|
||||
'Downloading access token JSON')
|
||||
def _download_base_gql(self, video_id, ops, note, fatal=True):
|
||||
headers = {
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
}
|
||||
gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
|
||||
if gql_auth:
|
||||
headers['Authorization'] = 'OAuth ' + gql_auth.value
|
||||
return self._download_json(
|
||||
'https://gql.twitch.tv/gql', video_id, note,
|
||||
data=json.dumps(ops).encode(),
|
||||
headers=headers, fatal=fatal)
|
||||
|
||||
def _extract_channel_id(self, token, channel_name):
|
||||
return compat_str(self._parse_json(token, channel_name)['channel_id'])
|
||||
def _download_gql(self, video_id, ops, note, fatal=True):
|
||||
for op in ops:
|
||||
op['extensions'] = {
|
||||
'persistedQuery': {
|
||||
'version': 1,
|
||||
'sha256Hash': self._OPERATION_HASHES[op['operationName']],
|
||||
}
|
||||
}
|
||||
return self._download_base_gql(video_id, ops, note)
|
||||
|
||||
def _download_access_token(self, video_id, token_kind, param_name):
|
||||
method = '%sPlaybackAccessToken' % token_kind
|
||||
ops = {
|
||||
'query': '''{
|
||||
%s(
|
||||
%s: "%s",
|
||||
params: {
|
||||
platform: "web",
|
||||
playerBackend: "mediaplayer",
|
||||
playerType: "site"
|
||||
}
|
||||
)
|
||||
{
|
||||
value
|
||||
signature
|
||||
}
|
||||
}''' % (method, param_name, video_id),
|
||||
}
|
||||
return self._download_base_gql(
|
||||
video_id, ops,
|
||||
'Downloading %s access token GraphQL' % token_kind)['data'][method]
|
||||
|
||||
|
||||
class TwitchVodIE(TwitchBaseIE):
|
||||
@@ -170,8 +193,6 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_ITEM_TYPE = 'vod'
|
||||
_ITEM_SHORTCUT = 'v'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
|
||||
@@ -181,7 +202,7 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
'title': 'LCK Summer Split - Week 6 Day 1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 17208,
|
||||
'timestamp': 1435131709,
|
||||
'timestamp': 1435131734,
|
||||
'upload_date': '20150624',
|
||||
'uploader': 'Riot Games',
|
||||
'uploader_id': 'riotgames',
|
||||
@@ -230,10 +251,20 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
}]
|
||||
|
||||
def _download_info(self, item_id):
|
||||
return self._extract_info(
|
||||
self._call_api(
|
||||
'kraken/videos/%s' % item_id, item_id,
|
||||
'Downloading video info JSON'))
|
||||
data = self._download_gql(
|
||||
item_id, [{
|
||||
'operationName': 'VideoMetadata',
|
||||
'variables': {
|
||||
'channelLogin': '',
|
||||
'videoID': item_id,
|
||||
},
|
||||
}],
|
||||
'Downloading stream metadata GraphQL')[0]['data']
|
||||
video = data.get('video')
|
||||
if video is None:
|
||||
raise ExtractorError(
|
||||
'Video %s does not exist' % item_id, expected=True)
|
||||
return self._extract_info_gql(video, item_id)
|
||||
|
||||
@staticmethod
|
||||
def _extract_info(info):
|
||||
@@ -272,13 +303,33 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_info_gql(info, item_id):
|
||||
vod_id = info.get('id') or item_id
|
||||
# id backward compatibility for download archives
|
||||
if vod_id[0] != 'v':
|
||||
vod_id = 'v%s' % vod_id
|
||||
thumbnail = url_or_none(info.get('previewThumbnailURL'))
|
||||
if thumbnail:
|
||||
for p in ('width', 'height'):
|
||||
thumbnail = thumbnail.replace('{%s}' % p, '0')
|
||||
return {
|
||||
'id': vod_id,
|
||||
'title': info.get('title') or 'Untitled Broadcast',
|
||||
'description': info.get('description'),
|
||||
'duration': int_or_none(info.get('lengthSeconds')),
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
|
||||
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
|
||||
'timestamp': unified_timestamp(info.get('publishedAt')),
|
||||
'view_count': int_or_none(info.get('viewCount')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
vod_id = self._match_id(url)
|
||||
|
||||
info = self._download_info(vod_id)
|
||||
access_token = self._call_api(
|
||||
'api/vods/%s/access_token' % vod_id, vod_id,
|
||||
'Downloading %s access token' % self._ITEM_TYPE)
|
||||
access_token = self._download_access_token(vod_id, 'video', 'id')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/vod/%s.m3u8?%s' % (
|
||||
@@ -289,8 +340,8 @@ class TwitchVodIE(TwitchBaseIE):
|
||||
'allow_spectre': 'true',
|
||||
'player': 'twitchweb',
|
||||
'playlist_include_framerate': 'true',
|
||||
'nauth': access_token['token'],
|
||||
'nauthsig': access_token['sig'],
|
||||
'nauth': access_token['value'],
|
||||
'nauthsig': access_token['signature'],
|
||||
})),
|
||||
vod_id, 'mp4', entry_protocol='m3u8_native')
|
||||
|
||||
@@ -333,37 +384,7 @@ def _make_video_result(node):
|
||||
}
|
||||
|
||||
|
||||
class TwitchGraphQLBaseIE(TwitchBaseIE):
|
||||
_PAGE_LIMIT = 100
|
||||
|
||||
_OPERATION_HASHES = {
|
||||
'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
|
||||
'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
|
||||
'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
|
||||
'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
|
||||
'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
|
||||
'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
|
||||
'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
|
||||
}
|
||||
|
||||
def _download_gql(self, video_id, ops, note, fatal=True):
|
||||
for op in ops:
|
||||
op['extensions'] = {
|
||||
'persistedQuery': {
|
||||
'version': 1,
|
||||
'sha256Hash': self._OPERATION_HASHES[op['operationName']],
|
||||
}
|
||||
}
|
||||
return self._download_json(
|
||||
'https://gql.twitch.tv/gql', video_id, note,
|
||||
data=json.dumps(ops).encode(),
|
||||
headers={
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
}, fatal=fatal)
|
||||
|
||||
|
||||
class TwitchCollectionIE(TwitchGraphQLBaseIE):
|
||||
class TwitchCollectionIE(TwitchBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@@ -400,7 +421,9 @@ class TwitchCollectionIE(TwitchGraphQLBaseIE):
|
||||
entries, playlist_id=collection_id, playlist_title=title)
|
||||
|
||||
|
||||
class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
|
||||
class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
_PAGE_LIMIT = 100
|
||||
|
||||
def _entries(self, channel_name, *args):
|
||||
cursor = None
|
||||
variables_common = self._make_variables(channel_name, *args)
|
||||
@@ -440,49 +463,6 @@ class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE):
|
||||
if not cursor or not isinstance(cursor, compat_str):
|
||||
break
|
||||
|
||||
# Deprecated kraken v5 API
|
||||
def _entries_kraken(self, channel_name, broadcast_type, sort):
|
||||
access_token = self._download_access_token(channel_name)
|
||||
channel_id = self._extract_channel_id(access_token['token'], channel_name)
|
||||
offset = 0
|
||||
counter_override = None
|
||||
for counter in itertools.count(1):
|
||||
response = self._call_api(
|
||||
'kraken/channels/%s/videos/' % channel_id,
|
||||
channel_id,
|
||||
'Downloading video JSON page %s' % (counter_override or counter),
|
||||
query={
|
||||
'offset': offset,
|
||||
'limit': self._PAGE_LIMIT,
|
||||
'broadcast_type': broadcast_type,
|
||||
'sort': sort,
|
||||
})
|
||||
videos = response.get('videos')
|
||||
if not isinstance(videos, list):
|
||||
break
|
||||
for video in videos:
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': TwitchVodIE.ie_key(),
|
||||
'id': video.get('_id'),
|
||||
'url': video_url,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'timestamp': unified_timestamp(video.get('published_at')),
|
||||
'duration': float_or_none(video.get('length')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'language': video.get('language'),
|
||||
}
|
||||
offset += self._PAGE_LIMIT
|
||||
total = int_or_none(response.get('_total'))
|
||||
if total and offset >= total:
|
||||
break
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
|
||||
@@ -724,7 +704,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
|
||||
playlist_title='%s - Collections' % channel_name)
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchGraphQLBaseIE):
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -814,8 +794,9 @@ class TwitchStreamIE(TwitchGraphQLBaseIE):
|
||||
if not stream:
|
||||
raise ExtractorError('%s is offline' % channel_name, expected=True)
|
||||
|
||||
access_token = self._download_access_token(channel_name)
|
||||
token = access_token['token']
|
||||
access_token = self._download_access_token(
|
||||
channel_name, 'stream', 'channelName')
|
||||
token = access_token['value']
|
||||
|
||||
stream_id = stream.get('id') or channel_name
|
||||
query = {
|
||||
@@ -826,7 +807,7 @@ class TwitchStreamIE(TwitchGraphQLBaseIE):
|
||||
'player': 'twitchweb',
|
||||
'playlist_include_framerate': 'true',
|
||||
'segment_preference': '4',
|
||||
'sig': access_token['sig'].encode('utf-8'),
|
||||
'sig': access_token['signature'].encode('utf-8'),
|
||||
'token': token.encode('utf-8'),
|
||||
}
|
||||
formats = self._extract_m3u8_formats(
|
||||
@@ -912,8 +893,8 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
clip = self._download_json(
|
||||
'https://gql.twitch.tv/gql', video_id, data=json.dumps({
|
||||
clip = self._download_base_gql(
|
||||
video_id, {
|
||||
'query': '''{
|
||||
clip(slug: "%s") {
|
||||
broadcaster {
|
||||
@@ -937,10 +918,7 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
}
|
||||
viewCount
|
||||
}
|
||||
}''' % video_id,
|
||||
}).encode(), headers={
|
||||
'Client-ID': self._CLIENT_ID,
|
||||
})['data']['clip']
|
||||
}''' % video_id}, 'Downloading clip GraphQL')['data']['clip']
|
||||
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
|
@@ -251,10 +251,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'info_dict': {
|
||||
'id': '700207533655363584',
|
||||
'ext': 'mp4',
|
||||
'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel',
|
||||
'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': 'simon vetugo',
|
||||
'uploader': 'simon vertugo',
|
||||
'uploader_id': 'simonvertugo',
|
||||
'duration': 30.0,
|
||||
'timestamp': 1455777459,
|
||||
@@ -312,6 +312,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'timestamp': 1492000653,
|
||||
'upload_date': '20170412',
|
||||
},
|
||||
'skip': 'Account suspended',
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/web/status/910031516746514432',
|
||||
'info_dict': {
|
||||
@@ -372,6 +373,24 @@ class TwitterIE(TwitterBaseIE):
|
||||
'uploader_id': '1eVjYOLGkGrQL',
|
||||
},
|
||||
'add_ie': ['TwitterBroadcast'],
|
||||
}, {
|
||||
# unified card
|
||||
'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
|
||||
'info_dict': {
|
||||
'id': '1349794411333394432',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
|
||||
'uploader': 'Brooklyn Nets',
|
||||
'uploader_id': 'BrooklynNets',
|
||||
'duration': 324.484,
|
||||
'timestamp': 1610651040,
|
||||
'upload_date': '20210114',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Twitch Clip Embed
|
||||
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
||||
@@ -380,6 +399,30 @@ class TwitterIE(TwitterBaseIE):
|
||||
# promo_video_website card
|
||||
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# promo_video_convo card
|
||||
'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# appplayer card
|
||||
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video_direct_message card
|
||||
'url': 'https://twitter.com/qarev001/status/1348948114569269251',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# poll2choice_video card
|
||||
'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# poll3choice_video card
|
||||
'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# poll4choice_video card
|
||||
'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -424,8 +467,7 @@ class TwitterIE(TwitterBaseIE):
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
||||
if media and media.get('type') != 'photo':
|
||||
def extract_from_video_info(media):
|
||||
video_info = media.get('video_info') or {}
|
||||
|
||||
formats = []
|
||||
@@ -452,6 +494,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(video_info.get('duration_millis'), 1000),
|
||||
})
|
||||
|
||||
media = try_get(status, lambda x: x['extended_entities']['media'][0])
|
||||
if media and media.get('type') != 'photo':
|
||||
extract_from_video_info(media)
|
||||
else:
|
||||
card = status.get('card')
|
||||
if card:
|
||||
@@ -462,7 +508,35 @@ class TwitterIE(TwitterBaseIE):
|
||||
return try_get(o, lambda x: x[x['type'].lower() + '_value'])
|
||||
|
||||
card_name = card['name'].split(':')[-1]
|
||||
if card_name in ('amplify', 'promo_video_website'):
|
||||
if card_name == 'player':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('player_url'),
|
||||
})
|
||||
elif card_name == 'periscope_broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('url') or get_binding_value('player_url'),
|
||||
'ie_key': PeriscopeIE.ie_key(),
|
||||
})
|
||||
elif card_name == 'broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('broadcast_url'),
|
||||
'ie_key': TwitterBroadcastIE.ie_key(),
|
||||
})
|
||||
elif card_name == 'summary':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('card_url'),
|
||||
})
|
||||
elif card_name == 'unified_card':
|
||||
media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
|
||||
extract_from_video_info(next(iter(media_entities.values())))
|
||||
# amplify, promo_video_website, promo_video_convo, appplayer,
|
||||
# video_direct_message, poll2choice_video, poll3choice_video,
|
||||
# poll4choice_video, ...
|
||||
else:
|
||||
is_amplify = card_name == 'amplify'
|
||||
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
|
||||
content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
|
||||
@@ -488,25 +562,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'duration': int_or_none(get_binding_value(
|
||||
'content_duration_seconds')),
|
||||
})
|
||||
elif card_name == 'player':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('player_url'),
|
||||
})
|
||||
elif card_name == 'periscope_broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('url') or get_binding_value('player_url'),
|
||||
'ie_key': PeriscopeIE.ie_key(),
|
||||
})
|
||||
elif card_name == 'broadcast':
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': get_binding_value('broadcast_url'),
|
||||
'ie_key': TwitterBroadcastIE.ie_key(),
|
||||
})
|
||||
else:
|
||||
raise ExtractorError('Unsupported Twitter Card.')
|
||||
else:
|
||||
expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
|
||||
if not expanded_url:
|
||||
|
@@ -5,10 +5,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '2117008346001',
|
||||
'ext': 'mp4',
|
||||
@@ -23,7 +22,11 @@ class UKTVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest']
|
||||
}
|
||||
}, {
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vhx:embed'
|
||||
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
|
||||
return unescapeHTML(mobj.group(1)) if mobj else None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
'ott data'), video_id, js_to_json)['config_url']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
info['id'] = video_id
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
return info
|
||||
|
@@ -12,7 +12,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class VVVVIDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
|
||||
_VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
# video_type == 'video/vvvvid'
|
||||
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
|
||||
@@ -21,6 +22,15 @@ class VVVVIDIE(InfoExtractor):
|
||||
'id': '489048',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ping Pong',
|
||||
'duration': 239,
|
||||
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
|
||||
'season_id': '437',
|
||||
'episode': 'Ping Pong',
|
||||
'episode_number': 1,
|
||||
'episode_id': '3334',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -37,6 +47,9 @@ class VVVVIDIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||
'only_matching': True
|
||||
}]
|
||||
_conn_id = None
|
||||
|
||||
@@ -45,20 +58,35 @@ class VVVVIDIE(InfoExtractor):
|
||||
'https://www.vvvvid.it/user/login',
|
||||
None, headers=self.geo_verification_headers())['data']['conn_id']
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
def _download_info(self, show_id, path, video_id, fatal=True):
|
||||
response = self._download_json(
|
||||
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
|
||||
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
|
||||
video_id, headers=self.geo_verification_headers(), query={
|
||||
'conn_id': self._conn_id,
|
||||
})
|
||||
if response['result'] == 'error':
|
||||
}, fatal=fatal)
|
||||
if not (response or fatal):
|
||||
return
|
||||
if response.get('result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['message']), expected=True)
|
||||
return response['data']
|
||||
|
||||
def _extract_common_video_info(self, video_data):
|
||||
return {
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
response = self._download_info(
|
||||
show_id, 'season/%s' % season_id, video_id)
|
||||
|
||||
vid = int(video_id)
|
||||
video_data = list(filter(
|
||||
lambda episode: episode.get('video_id') == vid, response['data']))[0]
|
||||
lambda episode: episode.get('video_id') == vid, response))[0]
|
||||
title = video_data['title']
|
||||
formats = []
|
||||
|
||||
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
|
||||
@@ -115,6 +143,17 @@ class VVVVIDIE(InfoExtractor):
|
||||
|
||||
return d
|
||||
|
||||
info = {}
|
||||
|
||||
def metadata_from_url(r_url):
|
||||
if not info and r_url:
|
||||
mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
|
||||
if mobj:
|
||||
info['episode_number'] = int(mobj.group(2))
|
||||
season_number = mobj.group(1)
|
||||
if season_number:
|
||||
info['season_number'] = int(season_number)
|
||||
|
||||
for quality in ('_sd', ''):
|
||||
embed_code = video_data.get('embed_info' + quality)
|
||||
if not embed_code:
|
||||
@@ -122,7 +161,6 @@ class VVVVIDIE(InfoExtractor):
|
||||
embed_code = ds(embed_code)
|
||||
video_type = video_data.get('video_type')
|
||||
if video_type in ('video/rcs', 'video/kenc'):
|
||||
embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
|
||||
if video_type == 'video/kenc':
|
||||
kenc = self._download_json(
|
||||
'https://www.vvvvid.it/kenc', video_id, query={
|
||||
@@ -133,26 +171,75 @@ class VVVVIDIE(InfoExtractor):
|
||||
kenc_message = kenc.get('message')
|
||||
if kenc_message:
|
||||
embed_code += '?' + ds(kenc_message)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
embed_code, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
||||
else:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||
metadata_from_url(embed_code)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
metadata_from_url(video_data.get('thumbnail'))
|
||||
info.update(self._extract_common_video_info(video_data))
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
'series': video_data.get('show_title'),
|
||||
'season_id': season_id,
|
||||
'season_number': video_data.get('season_number'),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
'episode_number': int_or_none(video_data.get('number')),
|
||||
'episode_title': video_data['title'],
|
||||
'episode': title,
|
||||
'view_count': int_or_none(video_data.get('views')),
|
||||
'like_count': int_or_none(video_data.get('video_likes')),
|
||||
}
|
||||
'repost_count': int_or_none(video_data.get('video_shares')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class VVVVIDShowIE(VVVVIDIE):
|
||||
_VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
|
||||
'info_dict': {
|
||||
'id': '156',
|
||||
'title': 'Psycho-Pass',
|
||||
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
|
||||
},
|
||||
'playlist_count': 46,
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/156',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
seasons = self._download_info(
|
||||
show_id, 'seasons/', show_title)
|
||||
|
||||
show_info = self._download_info(
|
||||
show_id, 'info/', show_title, fatal=False)
|
||||
|
||||
entries = []
|
||||
for season in (seasons or []):
|
||||
episodes = season.get('episodes') or []
|
||||
for episode in episodes:
|
||||
if episode.get('playable') is False:
|
||||
continue
|
||||
season_id = str_or_none(episode.get('season_id'))
|
||||
video_id = str_or_none(episode.get('video_id'))
|
||||
if not (season_id and video_id):
|
||||
continue
|
||||
info = self._extract_common_video_info(episode)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'ie_key': VVVVIDIE.ie_key(),
|
||||
'url': '/'.join([base_url, season_id, video_id]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'season_id': season_id,
|
||||
})
|
||||
entries.append(info)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, show_info.get('title'), show_info.get('description'))
|
||||
|
@@ -4,17 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
IE_NAME = 'washingtonpost'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
|
||||
'info_dict': {
|
||||
@@ -23,10 +19,15 @@ class WashingtonPostIE(InfoExtractor):
|
||||
'title': 'Egypt finds belongings, debris from plane crash',
|
||||
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
|
||||
'upload_date': '20160520',
|
||||
'uploader': 'Reuters',
|
||||
'timestamp': 1463778452,
|
||||
'timestamp': 1463775187,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.washingtonpost.com/video/world/egypt-finds-belongings-debris-from-plane-crash/2016/05/20/480ba4ee-1ec7-11e6-82c2-a7dcb313287d_video.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.washingtonpost.com/posttv/world/iraq-to-track-down-antiquities-after-islamic-state-museum-rampage/2015/02/28/7c57e916-bf86-11e4-9dfb-03366e719af8_video.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
@@ -35,73 +36,8 @@ class WashingtonPostIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
|
||||
video_id, transform_source=strip_jsonp)[0]['contentConfig']
|
||||
title = video_data['title']
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video_data.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
video_type = s.get('type')
|
||||
if video_type == 'smil':
|
||||
continue
|
||||
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
width = m3u8_format.get('width')
|
||||
if not width:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
m3u8_format.update({
|
||||
'vbr': int_or_none(vbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
width = int_or_none(s.get('width'))
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
has_width = width != 0
|
||||
formats.append({
|
||||
'format_id': (
|
||||
'%s-%d-%d' % (video_type, width, vbr)
|
||||
if width
|
||||
else video_type),
|
||||
'vbr': vbr if has_width else None,
|
||||
'width': width,
|
||||
'height': int_or_none(s.get('height')),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if has_width else 'none',
|
||||
'filesize': int_or_none(s.get('fileSize')),
|
||||
'url': s_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
|
||||
})
|
||||
source_media_url = video_data.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('blurb'),
|
||||
'uploader': video_data.get('credits', {}).get('source'),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('videoDuration'), 100),
|
||||
'timestamp': int_or_none(
|
||||
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
|
||||
}
|
||||
return self.url_result(
|
||||
'arcpublishing:wapo:' + video_id, 'ArcPublishing', video_id)
|
||||
|
||||
|
||||
class WashingtonPostArticleIE(InfoExtractor):
|
||||
@@ -121,9 +57,8 @@ class WashingtonPostArticleIE(InfoExtractor):
|
||||
'title': 'Breaking Points: The Paper Mine',
|
||||
'duration': 1290,
|
||||
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1395527908,
|
||||
'upload_date': '20140322',
|
||||
'timestamp': 1395440416,
|
||||
'upload_date': '20140321',
|
||||
},
|
||||
}, {
|
||||
'md5': '1fff6a689d8770966df78c8cb6c8c17c',
|
||||
@@ -133,9 +68,8 @@ class WashingtonPostArticleIE(InfoExtractor):
|
||||
'title': 'The town bureaucracy sustains',
|
||||
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||
'duration': 2220,
|
||||
'timestamp': 1395528005,
|
||||
'upload_date': '20140322',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1395441819,
|
||||
'upload_date': '20140321',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -151,8 +85,7 @@ class WashingtonPostArticleIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
|
||||
'upload_date': '20141230',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1419974765,
|
||||
'timestamp': 1419972442,
|
||||
'title': 'Why black boxes don’t transmit data in real time',
|
||||
}
|
||||
}]
|
||||
|
@@ -5,79 +5,34 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
|
||||
class WistiaBaseIE(InfoExtractor):
|
||||
_VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
|
||||
_VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
|
||||
_EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'md5': 'cafeb56ec0c53c18c97405eecb3133df',
|
||||
'info_dict': {
|
||||
'id': 'sh7fpupwlt',
|
||||
'ext': 'mov',
|
||||
'title': 'Being Resourceful',
|
||||
'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
|
||||
'upload_date': '20131204',
|
||||
'timestamp': 1386185018,
|
||||
'duration': 117,
|
||||
},
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = WistiaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
urls.append(unescapeHTML(match.group('url')))
|
||||
for match in re.finditer(
|
||||
r'''(?sx)
|
||||
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
|
||||
''', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data_json = self._download_json(
|
||||
self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
|
||||
# Some videos require this.
|
||||
headers={
|
||||
'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
|
||||
def _download_embed_config(self, config_type, config_id, referer):
|
||||
base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
|
||||
embed_config = self._download_json(
|
||||
base_url + '.json', config_id, headers={
|
||||
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
|
||||
})
|
||||
|
||||
if data_json.get('error'):
|
||||
if isinstance(embed_config, dict) and embed_config.get('error'):
|
||||
raise ExtractorError(
|
||||
'Error while getting the playlist', expected=True)
|
||||
|
||||
data = data_json['media']
|
||||
return embed_config
|
||||
|
||||
def _extract_media(self, embed_config):
|
||||
data = embed_config['media']
|
||||
video_id = data['hashedId']
|
||||
title = data['name']
|
||||
|
||||
formats = []
|
||||
@@ -160,3 +115,85 @@ class WistiaIE(InfoExtractor):
|
||||
'timestamp': int_or_none(data.get('createdAt')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class WistiaIE(WistiaBaseIE):
|
||||
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
|
||||
|
||||
_TESTS = [{
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
|
||||
'info_dict': {
|
||||
'id': '807fafadvk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drip Brennan Dunn Workshop',
|
||||
'description': 'a JV Webinars video',
|
||||
'upload_date': '20160518',
|
||||
'timestamp': 1463607249,
|
||||
'duration': 4987.11,
|
||||
},
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = WistiaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
urls.append(unescapeHTML(match.group('url')))
|
||||
for match in re.finditer(
|
||||
r'''(?sx)
|
||||
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
|
||||
''', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
embed_config = self._download_embed_config('media', video_id, url)
|
||||
return self._extract_media(embed_config)
|
||||
|
||||
|
||||
class WistiaPlaylistIE(WistiaBaseIE):
|
||||
_VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
|
||||
'info_dict': {
|
||||
'id': 'aodt9etokc',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist = self._download_embed_config('playlist', playlist_id, url)
|
||||
|
||||
entries = []
|
||||
for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
|
||||
embed_config = media.get('embed_config')
|
||||
if not embed_config:
|
||||
continue
|
||||
entries.append(self._extract_media(embed_config))
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -45,6 +45,7 @@ def aa_decode(aa_code):
|
||||
|
||||
class XFileShareIE(InfoExtractor):
|
||||
_SITES = (
|
||||
(r'aparat\.cam', 'Aparat'),
|
||||
(r'clipwatching\.com', 'ClipWatching'),
|
||||
(r'gounlimited\.to', 'GoUnlimited'),
|
||||
(r'govid\.me', 'GoVid'),
|
||||
@@ -78,6 +79,9 @@ class XFileShareIE(InfoExtractor):
|
||||
'title': 'sample',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aparat.cam/n4d6dh0wvlpr',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -1,23 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class YandexDiskIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
yadi\.sk|
|
||||
disk\.yandex\.
|
||||
(?:
|
||||
az|
|
||||
by|
|
||||
co(?:m(?:\.(?:am|ge|tr))?|\.il)|
|
||||
ee|
|
||||
fr|
|
||||
k[gz]|
|
||||
l[tv]|
|
||||
md|
|
||||
t[jm]|
|
||||
u[az]|
|
||||
ru
|
||||
)
|
||||
)/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
|
||||
'md5': '33955d7ae052f15853dc41f35f17581c',
|
||||
'md5': 'a4a8d52958c8fddcf9845935070402ae',
|
||||
'info_dict': {
|
||||
'id': 'VdOeDou8eZs6Y',
|
||||
'ext': 'mp4',
|
||||
@@ -27,92 +47,101 @@ class YandexDiskIE(InfoExtractor):
|
||||
'uploader_id': '300043621',
|
||||
'view_count': int,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
status = self._download_webpage(
|
||||
'https://disk.yandex.com/auth/status', video_id, query={
|
||||
'urlOrigin': url,
|
||||
'source': 'public',
|
||||
'md5': 'false',
|
||||
})
|
||||
|
||||
sk = self._search_regex(
|
||||
r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
|
||||
status, 'sk', group='value')
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
store = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
|
||||
webpage, 'store'), video_id)
|
||||
resource = store['resources'][store['rootResourceId']]
|
||||
|
||||
models = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
|
||||
webpage, 'video JSON'),
|
||||
video_id)
|
||||
title = resource['name']
|
||||
meta = resource.get('meta') or {}
|
||||
|
||||
data = next(
|
||||
model['data'] for model in models
|
||||
if model.get('model') == 'resource')
|
||||
public_url = meta.get('short_url')
|
||||
if public_url:
|
||||
video_id = self._match_id(public_url)
|
||||
|
||||
video_hash = data['id']
|
||||
title = data['name']
|
||||
source_url = (self._download_json(
|
||||
'https://cloud-api.yandex.net/v1/disk/public/resources/download',
|
||||
video_id, query={'public_key': url}, fatal=False) or {}).get('href')
|
||||
video_streams = resource.get('videoStreams') or {}
|
||||
video_hash = resource.get('hash') or url
|
||||
environment = store.get('environment') or {}
|
||||
sk = environment.get('sk')
|
||||
yandexuid = environment.get('yandexuid')
|
||||
if sk and yandexuid and not (source_url and video_streams):
|
||||
self._set_cookie(domain, 'yandexuid', yandexuid)
|
||||
|
||||
models = self._download_json(
|
||||
'https://disk.yandex.com/models/', video_id,
|
||||
data=urlencode_postdata({
|
||||
'_model.0': 'videoInfo',
|
||||
'id.0': video_hash,
|
||||
'_model.1': 'do-get-resource-url',
|
||||
'id.1': video_hash,
|
||||
'version': '13.6',
|
||||
'sk': sk,
|
||||
}), query={'_m': 'videoInfo'})['models']
|
||||
|
||||
videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
|
||||
source_url = try_get(
|
||||
models, lambda x: x[1]['data']['file'], compat_str)
|
||||
def call_api(action):
|
||||
return (self._download_json(
|
||||
urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
|
||||
'hash': video_hash,
|
||||
'sk': sk,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'text/plain',
|
||||
}, fatal=False) or {}).get('data') or {}
|
||||
if not source_url:
|
||||
# TODO: figure out how to detect if download limit has
|
||||
# been reached and then avoid unnecessary source format
|
||||
# extraction requests
|
||||
source_url = call_api('download-url').get('url')
|
||||
if not video_streams:
|
||||
video_streams = call_api('get-video-streams')
|
||||
|
||||
formats = []
|
||||
if source_url:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': 'source',
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
|
||||
'quality': 1,
|
||||
'filesize': int_or_none(meta.get('size'))
|
||||
})
|
||||
for video in videos:
|
||||
|
||||
for video in (video_streams.get('videos') or []):
|
||||
format_url = video.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
if video.get('dimension') == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
format_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
size = video.get('size') or {}
|
||||
height = int_or_none(size.get('height'))
|
||||
format_id = 'hls'
|
||||
if height:
|
||||
format_id += '-%dp' % height
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'protocol': 'm3u8_native',
|
||||
'url': format_url,
|
||||
'width': int_or_none(size.get('width')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(try_get(
|
||||
models, lambda x: x[0]['data']['duration']), 1000)
|
||||
uploader = try_get(
|
||||
data, lambda x: x['user']['display_name'], compat_str)
|
||||
uploader_id = try_get(
|
||||
data, lambda x: x['user']['uid'], compat_str)
|
||||
view_count = int_or_none(try_get(
|
||||
data, lambda x: x['meta']['views_counter']))
|
||||
uid = resource.get('uid')
|
||||
display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'duration': float_or_none(video_streams.get('duration'), 1000),
|
||||
'uploader': display_name,
|
||||
'uploader_id': uid,
|
||||
'view_count': int_or_none(meta.get('views_counter')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -13,26 +14,30 @@ class YandexVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
|
||||
yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=|
|
||||
frontend\.vh\.yandex\.ru/player/
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
(?P<id>(?:[\da-f]{32}|[\w-]{12}))
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
'md5': '33955d7ae052f15853dc41f35f17581c',
|
||||
'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374',
|
||||
'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4',
|
||||
'info_dict': {
|
||||
'id': '4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 0,
|
||||
'duration': 30,
|
||||
'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь',
|
||||
'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1549972939,
|
||||
'duration': 5575,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20190212',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
|
||||
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
@@ -52,53 +57,88 @@ class YandexVideoIE(InfoExtractor):
|
||||
# DASH with DRM
|
||||
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
content = self._download_json(
|
||||
'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
|
||||
video_id, query={
|
||||
'stream_options': 'hires',
|
||||
'disable_trackings': 1,
|
||||
})['content']
|
||||
player = try_get((self._download_json(
|
||||
'https://frontend.vh.yandex.ru/graphql', video_id, data=('''{
|
||||
player(content_id: "%s") {
|
||||
computed_title
|
||||
content_url
|
||||
description
|
||||
dislikes
|
||||
duration
|
||||
likes
|
||||
program_title
|
||||
release_date
|
||||
release_date_ut
|
||||
release_year
|
||||
restriction_age
|
||||
season
|
||||
start_time
|
||||
streams
|
||||
thumbnail
|
||||
title
|
||||
views_count
|
||||
}
|
||||
}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content'])
|
||||
if not player or player.get('error'):
|
||||
player = self._download_json(
|
||||
'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
|
||||
video_id, query={
|
||||
'stream_options': 'hires',
|
||||
'disable_trackings': 1,
|
||||
})
|
||||
content = player['content']
|
||||
|
||||
content_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content['streams'][0]['url'])
|
||||
title = content.get('title') or content.get('computed_title')
|
||||
title = content.get('title') or content['computed_title']
|
||||
|
||||
ext = determine_ext(content_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mpd':
|
||||
formats = self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash')
|
||||
else:
|
||||
formats = [{'url': content_url}]
|
||||
formats = []
|
||||
streams = content.get('streams') or []
|
||||
streams.append({'url': content.get('content_url')})
|
||||
for stream in streams:
|
||||
content_url = url_or_none(stream.get('url'))
|
||||
if not content_url:
|
||||
continue
|
||||
ext = determine_ext(content_url)
|
||||
if ext == 'ismc':
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({'url': content_url})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = content.get('description')
|
||||
thumbnail = content.get('thumbnail')
|
||||
timestamp = (int_or_none(content.get('release_date'))
|
||||
or int_or_none(content.get('release_date_ut'))
|
||||
or int_or_none(content.get('start_time')))
|
||||
duration = int_or_none(content.get('duration'))
|
||||
series = content.get('program_title')
|
||||
age_limit = int_or_none(content.get('restriction_age'))
|
||||
season = content.get('season') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'description': content.get('description'),
|
||||
'thumbnail': content.get('thumbnail'),
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'age_limit': age_limit,
|
||||
'duration': int_or_none(content.get('duration')),
|
||||
'series': content.get('program_title'),
|
||||
'age_limit': int_or_none(content.get('restriction_age')),
|
||||
'view_count': int_or_none(content.get('views_count')),
|
||||
'like_count': int_or_none(content.get('likes')),
|
||||
'dislike_count': int_or_none(content.get('dislikes')),
|
||||
'season_number': int_or_none(season.get('season_number')),
|
||||
'season_id': season.get('id'),
|
||||
'release_year': int_or_none(content.get('release_year')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -60,6 +60,9 @@ class YouPornIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.youporn.com/watch/505835',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -88,7 +91,7 @@ class YouPornIE(InfoExtractor):
|
||||
# Main source
|
||||
definitions = self._parse_json(
|
||||
self._search_regex(
|
||||
r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
|
||||
r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
|
||||
'media definitions', default='[]'),
|
||||
video_id, fatal=False)
|
||||
if definitions:
|
||||
@@ -100,7 +103,7 @@ class YouPornIE(InfoExtractor):
|
||||
links.append(video_url)
|
||||
|
||||
# Fallback #1, this also contains extra low quality 180p format
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
||||
links.append(link)
|
||||
|
||||
# Fallback #2 (unavailable as at 22.06.2017)
|
||||
@@ -128,8 +131,9 @@ class YouPornIE(InfoExtractor):
|
||||
# Video URL's path looks like this:
|
||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||
# We will benefit from it by extracting some metadata
|
||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
|
||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
if mobj:
|
||||
height = int(mobj.group('height'))
|
||||
bitrate = int(mobj.group('bitrate'))
|
||||
|
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
||||
from ..swfinterp import SWFInterpreter
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -279,6 +280,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
|
||||
def _call_api(self, ep, query, video_id):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
@@ -296,7 +298,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_yt_initial_data(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
|
||||
(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
|
||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||
video_id)
|
||||
|
||||
@@ -306,6 +308,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), video_id, fatal=False)
|
||||
|
||||
def _extract_video(self, renderer):
|
||||
video_id = renderer['videoId']
|
||||
title = try_get(
|
||||
renderer,
|
||||
(lambda x: x['title']['runs'][0]['text'],
|
||||
lambda x: x['title']['simpleText']), compat_str)
|
||||
description = try_get(
|
||||
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
||||
compat_str)
|
||||
duration = parse_duration(try_get(
|
||||
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||
view_count_text = try_get(
|
||||
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(
|
||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
@@ -321,7 +353,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
||||
(?:(?:www|dev)\.)?invidio\.us/|
|
||||
(?:(?:www|no)\.)?invidiou\.sh/|
|
||||
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
|
||||
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.mastodon\.host/|
|
||||
@@ -1102,6 +1134,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# another example of '};' in ytInitialData
|
||||
'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -1322,17 +1363,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return self._parse_json(
|
||||
uppercase_escape(config), video_id, fatal=False)
|
||||
|
||||
def _get_automatic_captions(self, video_id, webpage):
|
||||
def _get_automatic_captions(self, video_id, player_response, player_config):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
player_config = self._get_ytplayer_config(video_id, webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
if not player_config:
|
||||
if not (player_response or player_config):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
try:
|
||||
args = player_config['args']
|
||||
args = player_config.get('args') if player_config else {}
|
||||
caption_url = args.get('ttsurl')
|
||||
if caption_url:
|
||||
timestamp = args['timestamp']
|
||||
@@ -1391,19 +1431,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return captions
|
||||
|
||||
# New captions format as of 22.06.2017
|
||||
player_response = args.get('player_response')
|
||||
if player_response and isinstance(player_response, compat_str):
|
||||
player_response = self._parse_json(
|
||||
player_response, video_id, fatal=False)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
@@ -1652,6 +1688,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
ytplayer_config = None
|
||||
|
||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||
age_gate = True
|
||||
@@ -1705,7 +1742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info and not player_response:
|
||||
player_response = extract_player_response(
|
||||
self._search_regex(
|
||||
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||
(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
|
||||
'initial player response', default='{}'),
|
||||
video_id)
|
||||
@@ -2276,7 +2313,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
|
||||
|
||||
video_duration = try_get(
|
||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||
@@ -2446,7 +2483,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
)/
|
||||
(?:
|
||||
(?:channel|c|user|feed)/|
|
||||
(?:playlist|watch)\?.*?\blist=
|
||||
(?:playlist|watch)\?.*?\blist=|
|
||||
(?!(?:watch|embed|v|e)\b)
|
||||
)
|
||||
(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
@@ -2715,13 +2753,27 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
# inline playlist with not always working continuations
|
||||
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
|
||||
'only_matching': True,
|
||||
}
|
||||
# TODO
|
||||
# {
|
||||
# 'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
# 'only_matching': True,
|
||||
# }
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/course',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/zsecurity',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.youtube.com/NASAgovVideo/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if YoutubeIE.suitable(url) else super(
|
||||
YoutubeTabIE, cls).suitable(url)
|
||||
|
||||
def _extract_channel_id(self, webpage):
|
||||
channel_id = self._html_search_meta(
|
||||
@@ -2743,36 +2795,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
if renderer:
|
||||
return renderer
|
||||
|
||||
def _extract_video(self, renderer):
|
||||
video_id = renderer.get('videoId')
|
||||
title = try_get(
|
||||
renderer,
|
||||
(lambda x: x['title']['runs'][0]['text'],
|
||||
lambda x: x['title']['simpleText']), compat_str)
|
||||
description = try_get(
|
||||
renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
||||
compat_str)
|
||||
duration = parse_duration(try_get(
|
||||
renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||
view_count_text = try_get(
|
||||
renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(
|
||||
renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
|
||||
def _grid_entries(self, grid_renderer):
|
||||
for item in grid_renderer['items']:
|
||||
if not isinstance(item, dict):
|
||||
@@ -3003,10 +3025,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
for page_num in itertools.count(1):
|
||||
if not continuation:
|
||||
break
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/browse_ajax', None,
|
||||
'Downloading page %d' % page_num,
|
||||
headers=headers, query=continuation, fatal=False)
|
||||
count = 0
|
||||
retries = 3
|
||||
while count <= retries:
|
||||
try:
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# that is usually worked around with a retry
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/browse_ajax', None,
|
||||
'Downloading page %d%s'
|
||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||
headers=headers, query=continuation)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||
count += 1
|
||||
if count <= retries:
|
||||
continue
|
||||
raise
|
||||
if not browse:
|
||||
break
|
||||
response = try_get(browse, lambda x: x[1]['response'], dict)
|
||||
@@ -3381,46 +3417,29 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
||||
list)
|
||||
if not slr_contents:
|
||||
break
|
||||
isr_contents = try_get(
|
||||
slr_contents,
|
||||
lambda x: x[0]['itemSectionRenderer']['contents'],
|
||||
list)
|
||||
if not isr_contents:
|
||||
break
|
||||
for content in isr_contents:
|
||||
if not isinstance(content, dict):
|
||||
for slr_content in slr_contents:
|
||||
isr_contents = try_get(
|
||||
slr_content,
|
||||
lambda x: x['itemSectionRenderer']['contents'],
|
||||
list)
|
||||
if not isr_contents:
|
||||
continue
|
||||
video = content.get('videoRenderer')
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_id = video.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
|
||||
description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
|
||||
duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
|
||||
view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'^(\d+)', re.sub(r'\s', '', view_count_text),
|
||||
'view count', default=None))
|
||||
uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
||||
total += 1
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'uploader': uploader,
|
||||
}
|
||||
if total == n:
|
||||
return
|
||||
for content in isr_contents:
|
||||
if not isinstance(content, dict):
|
||||
continue
|
||||
video = content.get('videoRenderer')
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_id = video.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
yield self._extract_video(video)
|
||||
total += 1
|
||||
if total == n:
|
||||
return
|
||||
token = try_get(
|
||||
slr_contents,
|
||||
lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||
lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
||||
compat_str)
|
||||
if not token:
|
||||
break
|
||||
|
@@ -85,7 +85,13 @@ class ZypeIE(InfoExtractor):
|
||||
else:
|
||||
m3u8_url = self._search_regex(
|
||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
|
||||
body, 'm3u8 url', group='url')
|
||||
body, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
source = self._parse_json(self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
|
||||
'source'), video_id, js_to_json)
|
||||
if source.get('integration') == 'verizon-media':
|
||||
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
text_tracks = self._search_regex(
|
||||
|
@@ -3640,7 +3640,7 @@ def url_or_none(url):
|
||||
if not url or not isinstance(url, compat_str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
|
||||
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
@@ -5706,3 +5706,20 @@ def random_birthday(year_field, month_field, day_field):
|
||||
month_field: str(random_date.month),
|
||||
day_field: str(random_date.day),
|
||||
}
|
||||
|
||||
|
||||
def clean_podcast_url(url):
|
||||
return re.sub(r'''(?x)
|
||||
(?:
|
||||
(?:
|
||||
chtbl\.com/track|
|
||||
media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
|
||||
play\.podtrac\.com
|
||||
)/[^/]+|
|
||||
(?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
|
||||
flex\.acast\.com|
|
||||
pd(?:
|
||||
cn\.co| # https://podcorn.com/analytics-prefix/
|
||||
st\.fm # https://podsights.com/docs/
|
||||
)/e
|
||||
)/''', '', url)
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2020.12.22'
|
||||
__version__ = '2021.01.16'
|
||||
|
Reference in New Issue
Block a user