mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 22:28:37 +09:00
Compare commits
108 Commits
76fe4ba3b2
...
2021.01.03
Author | SHA1 | Date | |
---|---|---|---|
![]() |
8e953dcbb1 | ||
![]() |
f4afb9a6a8 | ||
![]() |
d5b8cf093c | ||
![]() |
5c6e84c0ff | ||
![]() |
1aaee908b9 | ||
![]() |
b2d9fd9c9f | ||
![]() |
bc2f83b95e | ||
![]() |
85de33b04e | ||
![]() |
7dfd966848 | ||
![]() |
a25d03d7cb | ||
![]() |
cabfd4b1f0 | ||
![]() |
7b643d4cd0 | ||
![]() |
1f1d01d498 | ||
![]() |
21a42e2588 | ||
![]() |
2df93a0c4a | ||
![]() |
75972e200d | ||
![]() |
d0d838638c | ||
![]() |
8c17afc471 | ||
![]() |
40d66e07df | ||
![]() |
ab89a8678b | ||
![]() |
4d7d056909 | ||
![]() |
c35bc82606 | ||
![]() |
2f56caf083 | ||
![]() |
4066945919 | ||
![]() |
2a84694b1e | ||
![]() |
4046ffe1e1 | ||
![]() |
d1d0612160 | ||
![]() |
7b0f04ed1f | ||
![]() |
2e21b06ea2 | ||
![]() |
a6f75e6e89 | ||
![]() |
bd18824c2a | ||
![]() |
bdd044e67b | ||
![]() |
f7e95fb2a0 | ||
![]() |
9dd674e1d2 | ||
![]() |
9c1e164e0c | ||
![]() |
c706fbe9fe | ||
![]() |
ebdcf70b0d | ||
![]() |
5966095e65 | ||
![]() |
9ee984fc76 | ||
![]() |
53528e1d23 | ||
![]() |
c931c4b8dd | ||
![]() |
7acd042bbb | ||
![]() |
bcfe485e01 | ||
![]() |
479cc6d5a1 | ||
![]() |
38286ee729 | ||
![]() |
1a95953867 | ||
![]() |
71febd1c52 | ||
![]() |
f1bc56c99b | ||
![]() |
64e419bd73 | ||
![]() |
782ea947b4 | ||
![]() |
f27224d57b | ||
![]() |
c007188598 | ||
![]() |
af93ecfd88 | ||
![]() |
794771a164 | ||
![]() |
6f2eaaf73d | ||
![]() |
4c7a4dbc4d | ||
![]() |
f86b299d0e | ||
![]() |
e474996541 | ||
![]() |
aed617e311 | ||
![]() |
0fa67c1d68 | ||
![]() |
365b3cc72d | ||
![]() |
a272fe21a8 | ||
![]() |
cec1c2f211 | ||
![]() |
12053450dc | ||
![]() |
46cffb0c47 | ||
![]() |
c32a059f52 | ||
![]() |
6911312e53 | ||
![]() |
f22b5a6b96 | ||
![]() |
58e55198c1 | ||
![]() |
d61ed9f2f1 | ||
![]() |
8bc4c6350e | ||
![]() |
cfa4ffa23b | ||
![]() |
4f1dc1463d | ||
![]() |
17e0f41d34 | ||
![]() |
b57b27ff8f | ||
![]() |
bbe8cc6662 | ||
![]() |
98106accb6 | ||
![]() |
af1312bfc3 | ||
![]() |
4c7d7215cd | ||
![]() |
0370d9eb3d | ||
![]() |
1434651d20 | ||
![]() |
2c312ab84a | ||
![]() |
0ee78d62d5 | ||
![]() |
7f3c90ab25 | ||
![]() |
1d3cd29730 | ||
![]() |
4ef1fc9707 | ||
![]() |
f9e6aa1dcf | ||
![]() |
f83db9064b | ||
![]() |
2da9a86399 | ||
![]() |
ecaa535cf4 | ||
![]() |
79dd92b1fe | ||
![]() |
bd3844c9c2 | ||
![]() |
7bf5e3a84a | ||
![]() |
08a17dae5b | ||
![]() |
924ea66ade | ||
![]() |
5b72f5b74f | ||
![]() |
bfa345744d | ||
![]() |
f966461476 | ||
![]() |
b8aea53682 | ||
![]() |
c0d9eb7043 | ||
![]() |
3ba6aabd25 | ||
![]() |
a8b31505ed | ||
![]() |
90a271e914 | ||
![]() |
172d270607 | ||
![]() |
22feed08a1 | ||
![]() |
942b8ca3be | ||
![]() |
3729c52f9d | ||
![]() |
71679eaee8 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.12.14
|
||||
[debug] youtube-dl version 2021.01.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.12.14
|
||||
[debug] youtube-dl version 2021.01.03
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.03. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.03**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
50
.github/workflows/ci.yml
vendored
Normal file
50
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: CI
|
||||
on: [push]
|
||||
jobs:
|
||||
tests:
|
||||
name: Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# TODO: python 2.6
|
||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
||||
ytdl-test-set: [core, download]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: bat
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Run tests
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' }}
|
||||
env:
|
||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
flake8:
|
||||
name: Linter
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
127
ChangeLog
127
ChangeLog
@@ -1,3 +1,130 @@
|
||||
version 2021.01.03
|
||||
|
||||
Extractors
|
||||
* [nrk] Improve series metadata extraction (#27473)
|
||||
+ [nrk] Extract subtitles
|
||||
* [nrk] Fix age limit extraction
|
||||
* [nrk] Improve video id extraction
|
||||
+ [nrk] Add support for podcasts (#27634, #27635)
|
||||
* [nrk] Generalize and delegate all item extractors to nrk
|
||||
+ [nrk] Add support for mp3 formats
|
||||
* [nrktv] Switch to playback endpoint
|
||||
* [vvvvid] Fix season metadata extraction (#18130)
|
||||
* [stitcher] Fix extraction (#20811, #27606)
|
||||
* [acast] Fix extraction (#21444, #27612, #27613)
|
||||
+ [arcpublishing] Add support for arcpublishing.com (#2298, #9340, #17200)
|
||||
+ [sky] Add support for Sports News articles and Brighcove videos (#13054)
|
||||
+ [vvvvid] Extract akamai formats
|
||||
* [vvvvid] Skip unplayable episodes (#27599)
|
||||
* [yandexvideo] Fix extraction for Python 3.4
|
||||
|
||||
|
||||
version 2020.12.31
|
||||
|
||||
Core
|
||||
* [utils] Accept only supported protocols in url_or_none
|
||||
* [YoutubeDL] Allow format filtering using audio language (#16209)
|
||||
|
||||
Extractors
|
||||
+ [redditr] Extract all thumbnails (#27503)
|
||||
* [vvvvid] Improve info extraction
|
||||
+ [vvvvid] Add support for playlists (#18130, #27574)
|
||||
+ [yandexdisk] Extract info from webpage
|
||||
* [yandexdisk] Fix extraction (#17861, #27131)
|
||||
* [yandexvideo] Use old API call as fallback
|
||||
* [yandexvideo] Fix extraction (#25000)
|
||||
- [nbc] Remove CSNNE extractor
|
||||
* [nbc] Fix NBCSport VPlayer URL extraction (#16640)
|
||||
+ [aenetworks] Add support for biography.com (#3863)
|
||||
* [uktvplay] Match new video URLs (#17909)
|
||||
* [sevenplay] Detect API errors
|
||||
* [tenplay] Fix format extraction (#26653)
|
||||
* [brightcove] Raise error for DRM protected videos (#23467, #27568)
|
||||
|
||||
|
||||
version 2020.12.29
|
||||
|
||||
Extractors
|
||||
* [youtube] Improve yt initial data extraction (#27524)
|
||||
* [youtube:tab] Improve URL matching #27559)
|
||||
* [youtube:tab] Restore retry on browse requests (#27313, #27564)
|
||||
* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904,
|
||||
#25418, #26070, #26350, #26738, #27563)
|
||||
- [brightcove] Remove sonyliv specific code
|
||||
* [piksel] Improve format extraction
|
||||
+ [zype] Add support for uplynk videos
|
||||
+ [toggle] Add support for live.mewatch.sg (#27555)
|
||||
+ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826)
|
||||
* [teachable] Improve embed detection (#26923)
|
||||
* [mitele] Fix free video extraction (#24624, #25827, #26757)
|
||||
* [telecinco] Fix extraction
|
||||
* [youtube] Update invidious.snopyta.org (#22667)
|
||||
* [amcnetworks] Improve auth only video detection (#27548)
|
||||
+ [generic] Add support for VHX Embeds (#27546)
|
||||
|
||||
|
||||
version 2020.12.26
|
||||
|
||||
Extractors
|
||||
* [instagram] Fix comment count extraction
|
||||
+ [instagram] Add support for reel URLs (#26234, #26250)
|
||||
* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821,
|
||||
#27538)
|
||||
* [instagram] Improve thumbnail extraction
|
||||
* [instagram] Fix extraction when authenticated (#22880, #26377, #26981,
|
||||
#27422)
|
||||
* [spankbang:playlist] Fix extraction (#24087)
|
||||
+ [spankbang] Add support for playlist videos
|
||||
* [pornhub] Improve like and dislike count extraction (#27356)
|
||||
* [pornhub] Fix lq formats extraction (#27386, #27393)
|
||||
+ [bongacams] Add support for bongacams.com (#27440)
|
||||
* [youtube:tab] Extend URL regular expression (#27501)
|
||||
* [theweatherchannel] Fix extraction (#25930, #26051)
|
||||
+ [sprout] Add support for Universal Kids (#22518)
|
||||
* [theplatform] Allow passing geo bypass countries from other extractors
|
||||
+ [wistia] Add support for playlists (#27533)
|
||||
+ [ctv] Add support for ctv.ca (#27525)
|
||||
* [9c9media] Improve info extraction
|
||||
* [youtube] Fix automatic captions extraction (#27162, #27388)
|
||||
* [sonyliv] Fix title for movies
|
||||
* [sonyliv] Fix extraction (#25667)
|
||||
* [streetvoice] Fix extraction (#27455, #27492)
|
||||
+ [facebook] Add support for watchparty pages (#27507)
|
||||
* [cbslocal] Fix video extraction
|
||||
+ [brightcove] Add another method to extract policyKey
|
||||
* [mewatch] Relax URL regular expression (#27506)
|
||||
|
||||
|
||||
version 2020.12.22
|
||||
|
||||
Core
|
||||
* [common] Remove unwanted query params from unsigned akamai manifest URLs
|
||||
|
||||
Extractors
|
||||
- [tastytrade] Remove extractor (#25716)
|
||||
* [niconico] Fix playlist extraction (#27428)
|
||||
- [everyonesmixtape] Remove extractor
|
||||
- [kanalplay] Remove extractor
|
||||
* [arkena] Fix extraction
|
||||
* [nba] Rewrite extractor
|
||||
* [turner] Improve info extraction
|
||||
* [youtube] Improve xsrf token extraction (#27442)
|
||||
* [generic] Improve RSS age limit extraction
|
||||
* [generic] Fix RSS itunes thumbnail extraction (#27405)
|
||||
+ [redditr] Extract duration (#27426)
|
||||
- [zaq1] Remove extractor
|
||||
+ [asiancrush] Add support for retrocrush.tv
|
||||
* [asiancrush] Fix extraction
|
||||
- [noco] Remove extractor (#10864)
|
||||
* [nfl] Fix extraction (#22245)
|
||||
* [skysports] Relax URL regular expression (#27435)
|
||||
+ [tv5unis] Add support for tv5unis.ca (#22399, #24890)
|
||||
+ [videomore] Add support for more.tv (#27088)
|
||||
+ [yandexmusic] Add support for music.yandex.com (#27425)
|
||||
+ [nhk:program] Add support for audio programs and program clips
|
||||
+ [nhk] Add support for NHK video programs (#27230)
|
||||
|
||||
|
||||
version 2020.12.14
|
||||
|
||||
Core
|
||||
|
@@ -1,4 +1,5 @@
|
||||
[](https://travis-ci.com/ytdl-org/youtube-dl)
|
||||
[](https://github.com/ytdl-org/youtube-dl/actions?query=workflow%3ACI)
|
||||
|
||||
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
@@ -677,6 +678,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
@@ -879,7 +881,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
17
devscripts/run_tests.bat
Normal file
17
devscripts/run_tests.bat
Normal file
@@ -0,0 +1,17 @@
|
||||
@echo off
|
||||
|
||||
rem Keep this list in sync with the `offlinetest` target in Makefile
|
||||
set DOWNLOAD_TESTS="age_restriction^|download^|iqiyi_sdk_interpreter^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature"
|
||||
|
||||
if "%YTDL_TEST_SET%" == "core" (
|
||||
set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
|
||||
set multiprocess_args=""
|
||||
) else if "%YTDL_TEST_SET%" == "download" (
|
||||
set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
|
||||
set multiprocess_args="--processes=4 --process-timeout=540"
|
||||
) else (
|
||||
echo YTDL_TEST_SET is not set or invalid
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
|
@@ -57,6 +57,7 @@
|
||||
- **appletrailers**
|
||||
- **appletrailers:section**
|
||||
- **archive.org**: archive.org videos
|
||||
- **ArcPublishing**
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
@@ -104,6 +105,7 @@
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliPlayer**
|
||||
- **BioBioChileTV**
|
||||
- **Biography**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
@@ -112,6 +114,7 @@
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BongaCams**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
@@ -146,6 +149,7 @@
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSLocalArticle**
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
@@ -195,9 +199,9 @@
|
||||
- **CrooksAndLiars**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **cu.ntv.co.jp**: Nippon Television Network
|
||||
- **Culturebox**
|
||||
@@ -268,7 +272,6 @@
|
||||
- **ESPNArticle**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
@@ -315,7 +318,6 @@
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameSpot**
|
||||
@@ -348,6 +350,7 @@
|
||||
- **hgtv.com:show**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
@@ -400,7 +403,6 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **KarriereVideos**
|
||||
@@ -541,6 +543,11 @@
|
||||
- **NationalGeographicTV**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **nba:watch**
|
||||
- **nba:watch:collection**
|
||||
- **NBAChannel**
|
||||
- **NBAEmbed**
|
||||
- **NBAWatchEmbed**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **nbcolympics**
|
||||
@@ -570,8 +577,10 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **nfl.com**
|
||||
- **nfl.com** (Currently broken)
|
||||
- **nfl.com:article** (Currently broken)
|
||||
- **NhkVod**
|
||||
- **NhkVodProgram**
|
||||
- **nhl.com**
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
@@ -585,7 +594,6 @@
|
||||
- **njoy:embed**
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **NonkTube**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
@@ -603,6 +611,7 @@
|
||||
- **Npr**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKRadioPodkast**
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
@@ -806,12 +815,13 @@
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **sky.it**
|
||||
- **sky:news**
|
||||
- **sky:sports**
|
||||
- **sky:sports:news**
|
||||
- **skyacademy.it**
|
||||
- **SkylineWebcams**
|
||||
- **SkyNews**
|
||||
- **skynewsarabia:article**
|
||||
- **skynewsarabia:video**
|
||||
- **SkySports**
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
@@ -872,7 +882,6 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
@@ -946,6 +955,8 @@
|
||||
- **TV2DKBornholmPlay**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **tv5unis**
|
||||
- **tv5unis:video**
|
||||
- **tv8.it**
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
@@ -1081,6 +1092,7 @@
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VVVVIDShow**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Wakanim**
|
||||
@@ -1103,6 +1115,7 @@
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **Wistia**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **WorldStarHipHop**
|
||||
- **WSJ**: Wall Street Journal
|
||||
@@ -1165,7 +1178,6 @@
|
||||
- **YoutubeYtBe**
|
||||
- **YoutubeYtUser**
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **Zattoo**
|
||||
- **ZattooLive**
|
||||
- **ZDF**
|
||||
|
@@ -36,7 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
@@ -57,8 +57,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
# def test_youtube_user_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||
|
@@ -554,6 +554,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
self.assertEqual(url_or_none('s3://foo.de'), None)
|
||||
self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
|
||||
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
||||
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
||||
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
|
@@ -1083,7 +1083,7 @@ class YoutubeDL(object):
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
|
@@ -2,21 +2,47 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class ACastIE(InfoExtractor):
|
||||
class ACastBaseIE(InfoExtractor):
|
||||
def _extract_episode(self, episode, show_info):
|
||||
title = episode['title']
|
||||
info = {
|
||||
'id': episode['id'],
|
||||
'display_id': episode.get('episodeUrl'),
|
||||
'url': episode['url'],
|
||||
'title': title,
|
||||
'description': clean_html(episode.get('description') or episode.get('summary')),
|
||||
'thumbnail': episode.get('image'),
|
||||
'timestamp': parse_iso8601(episode.get('publishDate')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'filesize': int_or_none(episode.get('contentLength')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(episode.get('episode')),
|
||||
}
|
||||
info.update(show_info)
|
||||
return info
|
||||
|
||||
def _extract_show_info(self, show):
|
||||
return {
|
||||
'creator': show.get('author'),
|
||||
'series': show.get('title'),
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id, query=None):
|
||||
return self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
|
||||
|
||||
|
||||
class ACastIE(ACastBaseIE):
|
||||
IE_NAME = 'acast'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -28,15 +54,15 @@ class ACastIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
||||
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
||||
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||
'timestamp': 1477346700,
|
||||
'upload_date': '20161024',
|
||||
'duration': 2766.602563,
|
||||
'duration': 2766,
|
||||
'creator': 'Anton Berg & Martin Johnson',
|
||||
'series': 'Spår',
|
||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||
@@ -45,7 +71,7 @@ class ACastIE(InfoExtractor):
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
@@ -54,40 +80,14 @@ class ACastIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
||||
display_id)
|
||||
media_url = s['url']
|
||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
||||
episode_url = s.get('episodeUrl')
|
||||
if episode_url:
|
||||
display_id = episode_url
|
||||
else:
|
||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e.get('name') or s['title']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
'season_number': int_or_none(e.get('seasonNumber')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||
}
|
||||
episode = self._call_api(
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
|
||||
|
||||
class ACastChannelIE(InfoExtractor):
|
||||
class ACastChannelIE(ACastBaseIE):
|
||||
IE_NAME = 'acast:channel'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@@ -102,34 +102,24 @@ class ACastChannelIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||
'title': 'Today in Focus',
|
||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
||||
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
'playlist_mincount': 200,
|
||||
}, {
|
||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
|
||||
def _fetch_page(self, channel_slug, page):
|
||||
casts = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
|
||||
channel_slug, note='Download page %d of channel data' % page)
|
||||
for cast in casts:
|
||||
yield self.url_result(
|
||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
||||
'ACast', cast['id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_slug = self._match_id(url)
|
||||
channel_data = self._download_json(
|
||||
self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_slug), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, compat_str(
|
||||
channel_data['id']), channel_data['name'], channel_data.get('description'))
|
||||
show_slug = self._match_id(url)
|
||||
show = self._call_api(show_slug, show_slug)
|
||||
show_info = self._extract_show_info(show)
|
||||
entries = []
|
||||
for episode in (show.get('episodes') or []):
|
||||
entries.append(self._extract_episode(episode, show_info))
|
||||
return self.playlist_result(
|
||||
entries, show.get('id'), show.get('title'), show.get('description'))
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
@@ -28,6 +29,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
'biography.com': (None, 'biography'),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
@@ -54,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e, GeoRestrictedError):
|
||||
raise
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
@@ -67,6 +71,34 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
@@ -139,32 +171,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
canonical, query={'filter[canonical]': '/' + canonical})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
|
||||
|
||||
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
@@ -294,3 +301,42 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
return self.url_result(
|
||||
'http://www.history.com/videos/' + display_id,
|
||||
AENetworksIE.ie_key())
|
||||
|
||||
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_aetn_info(domain, 'id', video_id, url)
|
||||
|
||||
|
||||
class BiographyIE(AENetworksBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
|
||||
'info_dict': {
|
||||
'id': '30322987',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vincent Van Gogh - Full Episode',
|
||||
'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
|
||||
'timestamp': 1311970571,
|
||||
'upload_date': '20110729',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
@@ -80,7 +80,8 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
if properties.get('videoCategory') == 'TVE-Auth':
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
|
@@ -9,7 +9,6 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
# from .anvato_token_generator import NFLTokenGenerator
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -204,10 +203,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
# 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
@@ -267,12 +262,9 @@ class AnvatoIE(InfoExtractor):
|
||||
'anvrid': anvrid,
|
||||
'anvts': server_time,
|
||||
}
|
||||
if access_key in self._TOKEN_GENERATORS:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
|
||||
else:
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
|
@@ -1,7 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nfl import NFLTokenGenerator
|
||||
|
||||
__all__ = [
|
||||
'NFLTokenGenerator',
|
||||
]
|
@@ -1,6 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class TokenGenerator:
|
||||
def generate(self, anvack, mcp_id):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
@@ -1,30 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import TokenGenerator
|
||||
|
||||
|
||||
class NFLTokenGenerator(TokenGenerator):
|
||||
_AUTHORIZATION = None
|
||||
|
||||
def generate(ie, anvack, mcp_id):
|
||||
if not NFLTokenGenerator._AUTHORIZATION:
|
||||
reroute = ie._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id,
|
||||
data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100})
|
||||
NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
|
||||
return ie._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': NFLTokenGenerator._AUTHORIZATION,
|
||||
'Content-Type': 'application/json',
|
||||
})['data']['viewer']['mediaToken']['token']
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
options = self._parse_json(self._search_regex(
|
||||
r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
|
||||
|
||||
formats = []
|
||||
for sources in player['multiSRC']:
|
||||
for sources in (options.get('multiSRC') or []):
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
info['title'] = get_element_by_id('videoTitle', webpage) or \
|
||||
self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'duration': int_or_none(options.get('duration')),
|
||||
'formats': formats,
|
||||
})
|
||||
|
174
youtube_dl/extractor/arcpublishing.py
Normal file
174
youtube_dl/extractor/arcpublishing.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
|
||||
'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.actionnewsjax.com/video/live-stream/
|
||||
'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
|
||||
'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
|
||||
'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
|
||||
'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
|
||||
'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
|
||||
'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
|
||||
'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
|
||||
'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
|
||||
'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
|
||||
'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_POWA_DEFAULTS = [
|
||||
(['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
|
||||
([
|
||||
'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
|
||||
'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
|
||||
'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
|
||||
], 'video-api-cdn.%s.arcpublishing.com/api'),
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
org, uuid = re.match(self._VALID_URL, url).groups()
|
||||
for orgs, tmpl in self._POWA_DEFAULTS:
|
||||
if org in orgs:
|
||||
base_api_tmpl = tmpl
|
||||
break
|
||||
else:
|
||||
base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
|
||||
if org == 'wapo':
|
||||
org = 'washpost'
|
||||
video = self._download_json(
|
||||
'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
|
||||
uuid, query={'uuid': uuid})[0]
|
||||
title = video['headlines']['basic']
|
||||
is_live = video.get('status') == 'live'
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
stream_type = s.get('stream_type')
|
||||
if stream_type == 'smil':
|
||||
smil_formats = self._extract_smil_formats(
|
||||
s_url, uuid, fatal=False)
|
||||
for f in smil_formats:
|
||||
if f['url'].endswith('/cfx/st'):
|
||||
f['app'] = 'cfx/st'
|
||||
if not f['play_path'].startswith('mp4:'):
|
||||
f['play_path'] = 'mp4:' + f['play_path']
|
||||
if isinstance(f['tbr'], float):
|
||||
f['vbr'] = f['tbr'] * 1000
|
||||
del f['tbr']
|
||||
f['format_id'] = 'rtmp-%d' % f['vbr']
|
||||
formats.extend(smil_formats)
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
if f.get('acodec') == 'none':
|
||||
f['preference'] = -40
|
||||
elif f.get('vcodec') == 'none':
|
||||
f['preference'] = -50
|
||||
height = f.get('height')
|
||||
if not height:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
f['vbr'] = int(vbr)
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
'filesize': int_or_none(s.get('filesize')),
|
||||
'url': s_url,
|
||||
'preference': -1,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('en', []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': uuid,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
|
||||
'description': try_get(video, lambda x: x['subheadlines']['basic']),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video.get('duration'), 100),
|
||||
'timestamp': parse_iso8601(video.get('created_date')),
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
@@ -6,13 +6,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,22 +18,27 @@ class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
video\.arkena\.com/play2/embed/player\?|
|
||||
video\.(?:arkena|qbrick)\.com/play2/embed/player\?|
|
||||
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
|
||||
'md5': '97f117754e5f3c020f5f26da4a44ebaf',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'id': 'd8ab4607-00090107-aab86310',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
'title': 'EM_HT20_117_roslund_v2.mp4',
|
||||
'timestamp': 1608285912,
|
||||
'upload_date': '20201218',
|
||||
'duration': 1429.162667,
|
||||
'subtitles': {
|
||||
'sv': 'count:3',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
@@ -72,62 +75,89 @@ class ArkenaIE(InfoExtractor):
|
||||
if not video_id or not account_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
media = self._download_json(
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
})
|
||||
metadata = media.get('metadata') or {}
|
||||
title = metadata['title']
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
duration = None
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
for resource in media['asset']['resources']:
|
||||
for rendition in (resource.get('renditions') or []):
|
||||
rendition_type = rendition.get('type')
|
||||
for i, link in enumerate(rendition.get('links') or []):
|
||||
href = link.get('href')
|
||||
if not href:
|
||||
continue
|
||||
if rendition_type == 'image':
|
||||
thumbnails.append({
|
||||
'filesize': int_or_none(rendition.get('size')),
|
||||
'height': int_or_none(rendition.get('height')),
|
||||
'id': rendition.get('id'),
|
||||
'url': href,
|
||||
'width': int_or_none(rendition.get('width')),
|
||||
})
|
||||
elif rendition_type == 'subtitle':
|
||||
subtitles.setdefault(rendition.get('language') or 'en', []).append({
|
||||
'url': href,
|
||||
})
|
||||
elif rendition_type == 'video':
|
||||
f = {
|
||||
'filesize': int_or_none(rendition.get('size')),
|
||||
'format_id': rendition.get('id'),
|
||||
'url': href,
|
||||
}
|
||||
video = try_get(rendition, lambda x: x['videos'][i], dict)
|
||||
if video:
|
||||
if not duration:
|
||||
duration = float_or_none(video.get('duration'))
|
||||
f.update({
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'vcodec': video.get('codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
})
|
||||
audio = try_get(video, lambda x: x['audios'][0], dict)
|
||||
if audio:
|
||||
f.update({
|
||||
'acodec': audio.get('codec'),
|
||||
'asr': int_or_none(audio.get('sampleRate')),
|
||||
})
|
||||
formats.append(f)
|
||||
elif rendition_type == 'index':
|
||||
mime_type = link.get('mimeType')
|
||||
if mime_type == 'application/smil+xml':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
href, video_id, fatal=False))
|
||||
elif mime_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif mime_type == 'application/hds+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'description': metadata.get('description'),
|
||||
'timestamp': parse_iso8601(media.get('created')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'duration': duration,
|
||||
'tags': media.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
_NETRC_MACHINE = 'bbc'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
_MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
|
||||
_MEDIA_SETS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
'iptv-all',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
_EMP_PLAYLIST_NS,
|
||||
)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
@@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
@@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
def _findall_ns(self, element, xpath):
|
||||
elements = []
|
||||
for ns in self._NAMESPACES:
|
||||
elements.extend(element.findall(xpath % ns))
|
||||
return elements
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
||||
if error is None:
|
||||
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
||||
if error is not None:
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return self._findall_ns(media_selection, './{%s}media')
|
||||
error = media_selection.get('result')
|
||||
if error:
|
||||
raise BBCCoUkIE.MediaSelectionError(error)
|
||||
return media_selection.get('media') or []
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
return media.get('connection') or []
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
@@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, compat_etree_Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
subtitles['en'] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
break
|
||||
return subtitles
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
@@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
last_exception = None
|
||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
||||
for media_set in self._MEDIA_SETS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||
last_exception = e
|
||||
@@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
media_selection = self._download_json(
|
||||
url, programme_id, 'Downloading media selection JSON',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
@@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if kind in ('video', 'audio'):
|
||||
bitrate = int_or_none(media.get('bitrate'))
|
||||
encoding = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
@@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
supplier = connection.get('supplier')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
format_id = supplier or conn_kind or protocol
|
||||
if service:
|
||||
format_id = '%s_%s' % (service, format_id)
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
@@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not service and not supplier and bitrate:
|
||||
if not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
@@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
error = self._search_regex(
|
||||
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||
r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
@@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
||||
# even not geo restricted at all
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
||||
_MEDIA_SETS = [
|
||||
'mobile-tablet-main',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
|
60
youtube_dl/extractor/bongacams.py
Normal file
60
youtube_dl/extractor/bongacams.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
amf = self._download_json(
|
||||
'https://%s/tools/amf.php' % host, channel_id,
|
||||
data=urlencode_postdata((
|
||||
('method', 'getRoomData'),
|
||||
('args[]', channel_id),
|
||||
('args[]', 'false'),
|
||||
)), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
server_url = amf['localData']['videoServerUrl']
|
||||
|
||||
uploader_id = try_get(
|
||||
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
|
||||
uploader = try_get(
|
||||
amf, lambda x: x['performerData']['displayName'], compat_str)
|
||||
like_count = int_or_none(try_get(
|
||||
amf, lambda x: x['performerData']['loversCount']))
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(uploader or uploader_id),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
@@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
num_drm_sources = 0
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
sources = json_data.get('sources') or []
|
||||
for source in sources:
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
if container == 'WVM' or source.get('key_systems'):
|
||||
num_drm_sources += 1
|
||||
continue
|
||||
elif ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if not formats:
|
||||
errors = json_data.get('errors')
|
||||
if errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if sources and num_drm_sources == len(sources):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -600,24 +601,27 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
config, lambda x: x['video_cloud']['policy_key'])
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
webpage = self._download_webpage(
|
||||
base_url + 'index.min.js', video_id)
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
@@ -11,7 +11,47 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mcp_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||
|
||||
|
||||
class CBSLocalArticleIE(AnvatoIE):
|
||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE):
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
|
@@ -2605,6 +2605,13 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
signed = 'hdnea=' in manifest_url
|
||||
if not signed:
|
||||
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
|
||||
manifest_url = re.sub(
|
||||
r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
|
||||
'', manifest_url).strip('?')
|
||||
|
||||
formats = []
|
||||
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
@@ -2630,7 +2637,7 @@ class InfoExtractor(object):
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
|
||||
if http_host and m3u8_formats and not signed:
|
||||
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
|
52
youtube_dl/extractor/ctv.py
Normal file
52
youtube_dl/extractor/ctv.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||
'info_dict': {
|
||||
'id': '2102249',
|
||||
'ext': 'flv',
|
||||
'title': 'Wednesday, December 23, 2020',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||
'timestamp': 1608732000,
|
||||
'upload_date': '20201223',
|
||||
'series': 'Your Morning',
|
||||
'season': '2020-2021',
|
||||
'season_number': 5,
|
||||
'episode_number': 88,
|
||||
'tags': ['Your Morning'],
|
||||
'categories': ['Talk Show'],
|
||||
'duration': 7467.126,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||
'query': '''{
|
||||
resolvedPath(path: "/%s") {
|
||||
lastSegment {
|
||||
content {
|
||||
... on AxisContent {
|
||||
axisId
|
||||
videoPlayerDestCode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['resolvedPath']['lastSegment']['content']
|
||||
video_id = content['axisId']
|
||||
return self.url_result(
|
||||
'9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
|
||||
'NineCNineMedia', video_id)
|
@@ -1,77 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class EveryonesMixtapeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
|
||||
'info_dict': {
|
||||
'id': '5bfseWNmlds',
|
||||
'ext': 'mp4',
|
||||
'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
|
||||
'uploader': 'FKR.TV',
|
||||
'uploader_id': 'frenchkissrecords',
|
||||
'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
|
||||
'upload_date': '20081015'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # This is simply YouTube
|
||||
}
|
||||
}, {
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
|
||||
'info_dict': {
|
||||
'id': 'm7m0jJAbMQi',
|
||||
'title': 'Driving',
|
||||
},
|
||||
'playlist_count': 24
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
|
||||
pllist_req = sanitized_Request(pllist_url)
|
||||
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
playlist_list = self._download_json(
|
||||
pllist_req, playlist_id, note='Downloading playlist metadata')
|
||||
try:
|
||||
playlist_no = next(playlist['id']
|
||||
for playlist in playlist_list
|
||||
if playlist['code'] == playlist_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('Playlist id not found')
|
||||
|
||||
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
|
||||
pl_req = sanitized_Request(pl_url)
|
||||
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
playlist = self._download_json(
|
||||
pl_req, playlist_id, note='Downloading playlist info')
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': t['url'],
|
||||
'title': t['title'],
|
||||
} for t in playlist['tracks']]
|
||||
|
||||
if mobj.group('songnr'):
|
||||
songnr = int(mobj.group('songnr')) - 1
|
||||
return entries[songnr]
|
||||
|
||||
playlist_title = playlist['mixData']['name']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': playlist_title,
|
||||
'entries': entries,
|
||||
}
|
@@ -33,6 +33,8 @@ from .aenetworks import (
|
||||
AENetworksCollectionIE,
|
||||
AENetworksShowIE,
|
||||
HistoryTopicIE,
|
||||
HistoryPlayerIE,
|
||||
BiographyIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
@@ -54,6 +56,7 @@ from .appletrailers import (
|
||||
AppleTrailersSectionIE,
|
||||
)
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
@@ -119,6 +122,7 @@ from .bleacherreport import (
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .bpb import BpbIE
|
||||
@@ -163,7 +167,10 @@ from .cbc import (
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
@@ -241,6 +248,7 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
@@ -327,7 +335,6 @@ from .espn import (
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
@@ -395,7 +402,6 @@ from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamespot import GameSpotIE
|
||||
@@ -501,7 +507,6 @@ from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
@@ -679,9 +684,15 @@ from .nationalgeographic import (
|
||||
NationalGeographicTVIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nba import (
|
||||
NBAWatchEmbedIE,
|
||||
NBAWatchIE,
|
||||
NBAWatchCollectionIE,
|
||||
NBAEmbedIE,
|
||||
NBAIE,
|
||||
NBAChannelIE,
|
||||
)
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
@@ -779,6 +790,7 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKRadioPodkastIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
@@ -1042,6 +1054,7 @@ from .skynewsarabia import (
|
||||
from .sky import (
|
||||
SkyNewsIE,
|
||||
SkySportsIE,
|
||||
SkySportsNewsIE,
|
||||
)
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
@@ -1123,7 +1136,6 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
@@ -1416,7 +1428,10 @@ from .vshare import VShareIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
VVVVIDShowIE,
|
||||
)
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
@@ -1447,7 +1462,10 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wistia import WistiaIE
|
||||
from .wistia import (
|
||||
WistiaIE,
|
||||
WistiaPlaylistIE,
|
||||
)
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -8,6 +9,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/
|
||||
groups/[^/]+/permalink/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
@@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||
'info_dict': {
|
||||
'id': '211641140192478',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
'graphURI': '/api/graphql/'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||
@@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
graphql_data = self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
|
||||
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
|
||||
for require in (graphql_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
entries = []
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
@@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
post_data = {
|
||||
'doc_id': 3731964053542869,
|
||||
'variables': json.dumps({
|
||||
'livingRoomID': video_id,
|
||||
}),
|
||||
}
|
||||
|
||||
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||
if prefetched_data:
|
||||
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||
if lsd:
|
||||
post_data[lsd['name']] = lsd['value']
|
||||
|
||||
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||
for define in (relay_data.get('define') or []):
|
||||
if define[0] == 'RelayAPIConfigDefaults':
|
||||
self._api_config = define[2]
|
||||
|
||||
living_room = self._download_json(
|
||||
urljoin(url, self._api_config['graphURI']), video_id,
|
||||
data=urlencode_postdata(post_data))['data']['living_room']
|
||||
|
||||
entries = []
|
||||
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||
v_id = video.get('id')
|
||||
if not v_id:
|
||||
continue
|
||||
v_id = compat_str(v_id)
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||
self.ie_key(), v_id, video.get('name')))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
# Video info not in first request, do a secondary request using
|
||||
# tahoe player specific URL
|
||||
tahoe_data = self._download_webpage(
|
||||
|
@@ -1,77 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/1032565827847',
|
||||
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
|
||||
'info_dict': {
|
||||
'id': 'dRzwHC_MMqIv',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20170825',
|
||||
'timestamp': 1503686274,
|
||||
'episode_number': 0,
|
||||
'season_number': 2,
|
||||
'series': 'Better Things',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'series': video_data.get('data-show-title'),
|
||||
'episode_number': int_or_none(video_data.get('data-episode')),
|
||||
'season_number': int_or_none(video_data.get('data-season')),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@@ -35,6 +35,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
@@ -66,7 +67,10 @@ from .tube8 import Tube8IE
|
||||
from .mofosex import MofosexEmbedIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .youporn import YouPornIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
@@ -123,6 +127,7 @@ from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -217,6 +222,33 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
},
|
||||
# RSS feed with item with description and thumbnails
|
||||
{
|
||||
'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'info_dict': {
|
||||
'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'title': 're:.*100% Hydrogen.*',
|
||||
'description': 're:.*In this episode.*',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'm4a',
|
||||
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||
'title': 're:Hydrogen!',
|
||||
'description': 're:.*In this episode we are going.*',
|
||||
'timestamp': 1567977776,
|
||||
'upload_date': '20190908',
|
||||
'duration': 459,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosures and unsupported link URLs
|
||||
{
|
||||
'url': 'http://www.hellointernet.fm/podcast?format=rss',
|
||||
@@ -1996,22 +2028,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
|
||||
'info_dict': {
|
||||
'id': 'uPDB5I9wfp8',
|
||||
'ext': 'webm',
|
||||
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
|
||||
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
|
||||
'upload_date': '20160219',
|
||||
'uploader': 'Pocoyo - Português (BR)',
|
||||
'uploader_id': 'PocoyoBrazil',
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
|
||||
'info_dict': {
|
||||
@@ -2181,7 +2197,32 @@ class GenericIE(InfoExtractor):
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
{
|
||||
# VHX Embed
|
||||
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
|
||||
'info_dict': {
|
||||
'id': '858208',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled',
|
||||
'uploader_id': 'user80538407',
|
||||
'uploader': 'OTT Videos',
|
||||
},
|
||||
},
|
||||
{
|
||||
# ArcPublishing PoWa video player
|
||||
'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
|
||||
'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
|
||||
'info_dict': {
|
||||
'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Senate candidates wave to voters on Anchorage streets',
|
||||
'description': 'md5:91f51a6511f090617353dc720318b20e',
|
||||
'timestamp': 1604378735,
|
||||
'upload_date': '20201103',
|
||||
'duration': 1581,
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2218,10 +2259,10 @@ class GenericIE(InfoExtractor):
|
||||
default=None)
|
||||
|
||||
duration = itunes('duration')
|
||||
explicit = itunes('explicit')
|
||||
if explicit == 'true':
|
||||
explicit = (itunes('explicit') or '').lower()
|
||||
if explicit in ('true', 'yes'):
|
||||
age_limit = 18
|
||||
elif explicit == 'false':
|
||||
elif explicit in ('false', 'no'):
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = None
|
||||
@@ -2234,7 +2275,7 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': unified_timestamp(
|
||||
xpath_text(it, 'pubDate', default=None)),
|
||||
'duration': int_or_none(duration) or parse_duration(duration),
|
||||
'thumbnail': url_or_none(itunes('image')),
|
||||
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
|
||||
'episode': itunes('title'),
|
||||
'episode_number': int_or_none(itunes('episode')),
|
||||
'season_number': int_or_none(itunes('season')),
|
||||
@@ -2548,6 +2589,10 @@ class GenericIE(InfoExtractor):
|
||||
if tp_urls:
|
||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||
|
||||
arc_urls = ArcPublishingIE._extract_urls(webpage)
|
||||
if arc_urls:
|
||||
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2559,6 +2604,10 @@ class GenericIE(InfoExtractor):
|
||||
if vimeo_urls:
|
||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||
|
||||
vhx_url = VHXEmbedIE._extract_url(webpage)
|
||||
if vhx_url:
|
||||
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
|
@@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'resource_id': 'Disney',
|
||||
}
|
||||
},
|
||||
'fxnow.fxnetworks': {
|
||||
'brand': '025',
|
||||
'requestor_id': 'dtci',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?P<sub_domain>%s)\.)?go|
|
||||
(?P<sub_domain_2>abc|freeform|disneynow)
|
||||
(?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
@@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||
'info_dict': {
|
||||
'id': 'VDKA12782841',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
|
@@ -22,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
(media, video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
width) = [None] * 12
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -137,59 +140,77 @@ class InstagramIE(InfoExtractor):
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if additional_data:
|
||||
media = try_get(
|
||||
additional_data, lambda x: x['graphql']['shortcode_media'],
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
||||
def get_count(key, kind):
|
||||
return int_or_none(try_get(
|
||||
def get_count(keys, kind):
|
||||
if not isinstance(keys, (list, tuple)):
|
||||
keys = [keys]
|
||||
for key in keys:
|
||||
count = int_or_none(try_get(
|
||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count('to_comment', 'comment')
|
||||
if count is not None:
|
||||
return count
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count(
|
||||
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
@@ -1,97 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
srt_subtitles_timecode,
|
||||
)
|
||||
|
||||
|
||||
class KanalPlayIE(InfoExtractor):
|
||||
IE_DESC = 'Kanal 5/9/11 Play'
|
||||
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
||||
'info_dict': {
|
||||
'id': '3270012277',
|
||||
'ext': 'flv',
|
||||
'title': 'Saknar både dusch och avlopp',
|
||||
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
||||
'duration': 2636.36,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_subtitles(self, subs):
|
||||
return '\r\n\r\n'.join(
|
||||
'%s\r\n%s --> %s\r\n%s'
|
||||
% (
|
||||
num,
|
||||
srt_subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
item['text'],
|
||||
) for num, item in enumerate(subs, 1))
|
||||
|
||||
def _get_subtitles(self, channel_id, video_id):
|
||||
subs = self._download_json(
|
||||
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
channel_id = mobj.group('channel_id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
||||
video_id)
|
||||
|
||||
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
||||
if reasons_for_no_streams:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
||||
expected=True)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
duration = float_or_none(video.get('length'), 1000)
|
||||
thumbnail = video.get('posterUrl')
|
||||
|
||||
stream_base_url = video['streamBaseUrl']
|
||||
|
||||
formats = [{
|
||||
'url': stream_base_url,
|
||||
'play_path': stream['source'],
|
||||
'ext': 'flv',
|
||||
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
||||
'rtmp_real_time': True,
|
||||
} for stream in video['streams']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if video.get('hasSubtitle'):
|
||||
subtitles = self.extract_subtitles(channel_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -1,15 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .telecinco import TelecincoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
class MiTeleIE(TelecincoIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
@@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
@@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor):
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video = pre_player['video']
|
||||
video_id = video['dataMediaId']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
info = content.get('info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
# for some reason only HLS is supported
|
||||
'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
|
||||
'id': video_id,
|
||||
video_info.update({
|
||||
'title': title,
|
||||
'description': info.get('synopsis'),
|
||||
'series': content.get('title'),
|
||||
@@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'episode': content.get('subtitle'),
|
||||
'episode_number': int_or_none(info.get('episode_number')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': video.get('dataPoster'),
|
||||
'age_limit': int_or_none(info.get('rating')),
|
||||
'timestamp': parse_iso8601(pre_player.get('publishedTime')),
|
||||
}
|
||||
})
|
||||
return video_info
|
||||
|
@@ -5,33 +5,137 @@ import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
OnDemandPagedList,
|
||||
remove_start,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
class NBACVPBaseIE(TurnerBaseIE):
|
||||
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||
return self._extract_cvp_info(
|
||||
'http://secure.nba.com/%s' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
}, fatal=fatal)
|
||||
|
||||
|
||||
class NBAWatchBaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
|
||||
|
||||
def _extract_video(self, filter_key, filter_value):
|
||||
video = self._download_json(
|
||||
'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
|
||||
filter_value, query={
|
||||
'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
|
||||
'q': filter_key + ':' + filter_value,
|
||||
'wt': 'json',
|
||||
})['response']['docs'][0]
|
||||
|
||||
video_id = str(video['pid'])
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
m3u8_url = (self._download_json(
|
||||
'https://watch.nba.com/service/publishpoint', video_id, query={
|
||||
'type': 'video',
|
||||
'format': 'json',
|
||||
'id': video_id,
|
||||
}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
|
||||
}, fatal=False) or {}).get('path')
|
||||
if m3u8_url:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
for f in m3u8_formats:
|
||||
http_f = f.copy()
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': http_f['url'].replace('.m3u8', ''),
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('runtime')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
'tags': video.get('tags'),
|
||||
}
|
||||
|
||||
seo_name = video.get('seoName')
|
||||
if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
|
||||
base_path = ''
|
||||
if seo_name.startswith('teams/'):
|
||||
base_path += seo_name.split('/')[1] + '/'
|
||||
base_path += 'video/'
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
base_path + seo_name + '.xml', video_id, False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
IENAME = 'nba:watch:embed'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.nba.com/embed?id=659395',
|
||||
'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
|
||||
'info_dict': {
|
||||
'id': '659395',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'timestamp': 1492228800,
|
||||
'upload_date': '20170415',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video('pid', video_id)
|
||||
|
||||
|
||||
class NBAWatchIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap',
|
||||
'id': '70946',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
'timestamp': 1354638466,
|
||||
'timestamp': 1354597200,
|
||||
'upload_date': '20121204',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
@@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE):
|
||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||
'info_dict': {
|
||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'id': '330865',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hawks vs. Cavaliers Game 1',
|
||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||
'duration': 228,
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160216',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'timberwolves',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'params': {
|
||||
# Download the whole playlist takes too long time
|
||||
'playlist_items': '1-30',
|
||||
'timestamp': 1432094400,
|
||||
'upload_date': '20150521',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
'upload_date': '20141212',
|
||||
'timestamp': 1418418600,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only CVP mp4 format available
|
||||
'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
|
||||
if collection_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
|
||||
return self.url_result(
|
||||
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||
return self._extract_video('seoName', display_id)
|
||||
|
||||
def _fetch_page(self, team, video_id, page):
|
||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
|
||||
'type': 'teamvideo',
|
||||
'start': page * self._PAGE_SIZE + 1,
|
||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
||||
'sort': 'recent',
|
||||
'output': 'json',
|
||||
'site': team,
|
||||
})
|
||||
results = self._download_json(
|
||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
||||
for item in results:
|
||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
||||
|
||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
||||
team = orig_path.split('/')[0]
|
||||
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch:collection'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.nba.com/list/collection/season-preview-2020',
|
||||
'info_dict': {
|
||||
'id': 'season-preview-2020',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video because of --no-playlist')
|
||||
video_path = self._search_regex(
|
||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
||||
return self.url_result(video_url)
|
||||
|
||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
def _fetch_page(self, collection_id, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
|
||||
collection_id, 'Downloading page %d JSON metadata' % page, query={
|
||||
'count': self._PAGE_SIZE,
|
||||
'page': page,
|
||||
})['results']['videos']
|
||||
for video in videos:
|
||||
program = video.get('program') or {}
|
||||
seo_name = program.get('seoName') or program.get('slug')
|
||||
if not seo_name:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': program.get('id'),
|
||||
'title': program.get('title') or video.get('title'),
|
||||
'url': 'https://www.nba.com/watch/video/' + seo_name,
|
||||
'thumbnail': video.get('image'),
|
||||
'description': program.get('description') or video.get('description'),
|
||||
'duration': parse_duration(program.get('runtimeHours')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
orig_path = path
|
||||
if path.startswith('nba/'):
|
||||
path = path[3:]
|
||||
collection_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, collection_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, collection_id)
|
||||
|
||||
if 'video/' not in path:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
||||
|
||||
if path == '{{id}}':
|
||||
return self._extract_playlist(orig_path, video_id, webpage)
|
||||
class NBABaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'''(?x)
|
||||
https?://(?:www\.)?nba\.com/
|
||||
(?P<team>
|
||||
blazers|
|
||||
bucks|
|
||||
bulls|
|
||||
cavaliers|
|
||||
celtics|
|
||||
clippers|
|
||||
grizzlies|
|
||||
hawks|
|
||||
heat|
|
||||
hornets|
|
||||
jazz|
|
||||
kings|
|
||||
knicks|
|
||||
lakers|
|
||||
magic|
|
||||
mavericks|
|
||||
nets|
|
||||
nuggets|
|
||||
pacers|
|
||||
pelicans|
|
||||
pistons|
|
||||
raptors|
|
||||
rockets|
|
||||
sixers|
|
||||
spurs|
|
||||
suns|
|
||||
thunder|
|
||||
timberwolves|
|
||||
warriors|
|
||||
wizards
|
||||
)
|
||||
(?:/play\#)?/'''
|
||||
_CHANNEL_PATH_REGEX = r'video/channel|series'
|
||||
|
||||
# See prepareContentId() of pkgCvp.js
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
def _embed_url_result(self, team, content_id):
|
||||
return self.url_result(update_url_query(
|
||||
'https://secure.nba.com/assets/amp/include/video/iframe.html', {
|
||||
'contentId': content_id,
|
||||
'team': team,
|
||||
}), NBAEmbedIE.ie_key())
|
||||
|
||||
return self._extract_cvp_info(
|
||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
def _call_api(self, team, content_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
|
||||
content_id, 'Download %s JSON metadata' % resource,
|
||||
query=query, headers={
|
||||
'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
|
||||
})['response']['result']
|
||||
|
||||
def _extract_video(self, video, team, extract_all=True):
|
||||
video_id = compat_str(video['nid'])
|
||||
team = video['brand']
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video.get('title') or video.get('headline') or video['shortHeadline'],
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
}
|
||||
|
||||
subtitles = {}
|
||||
captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||
|
||||
formats = []
|
||||
mp4_url = video.get('mp4')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
if extract_all:
|
||||
source_url = video.get('videoSource')
|
||||
if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
|
||||
formats.append({
|
||||
'format_id': 'source',
|
||||
'url': source_url,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
m3u8_url = video.get('m3u8')
|
||||
if m3u8_url:
|
||||
if '.akamaihd.net/i/' in m3u8_url:
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
content_xml = video.get('contentXml')
|
||||
if team and content_xml:
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
team + content_xml, video_id, fatal=False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
info.update(self._embed_url_result(team, video['videoId']))
|
||||
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
team, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if '/play#/' in url:
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
display_id = self._search_regex(
|
||||
self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
|
||||
return self._extract_url_results(team, display_id)
|
||||
|
||||
|
||||
class NBAEmbedIE(NBABaseIE):
|
||||
IENAME = 'nba:embed'
|
||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
content_id = qs['contentId'][0]
|
||||
team = qs.get('team', [None])[0]
|
||||
if not team:
|
||||
return self.url_result(
|
||||
'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
|
||||
video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
|
||||
return self._extract_video(video, team)
|
||||
|
||||
|
||||
class NBAIE(NBABaseIE):
|
||||
IENAME = 'nba'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
||||
'info_dict': {
|
||||
'id': '45039',
|
||||
'ext': 'mp4',
|
||||
'title': 'AND WE BACK.',
|
||||
'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
|
||||
'duration': 94,
|
||||
'timestamp': 1607112000,
|
||||
'upload_date': '20201218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoID'
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
return self._embed_url_result(team, content_id)
|
||||
|
||||
|
||||
class NBAChannelIE(NBABaseIE):
|
||||
IENAME = 'nba:channel'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
||||
'info_dict': {
|
||||
'title': 'Summer League',
|
||||
},
|
||||
'playlist_mincount': 138,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoSubCategory'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, team, channel, page):
|
||||
results = self._call_api(team, channel, {
|
||||
'channels': channel,
|
||||
'count': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in results:
|
||||
yield self._extract_video(video, team, False)
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, content_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, playlist_title=content_id)
|
||||
|
@@ -158,7 +158,8 @@ class NBCIE(AdobePassIE):
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||
@@ -174,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
iframe_m = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||
r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
|
||||
if iframe_m:
|
||||
return iframe_m.group('url')
|
||||
|
||||
@@ -192,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
# Does not include https because its certificate is invalid
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# iframe src
|
||||
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||
'info_dict': {
|
||||
'id': 'PHJSaFWbrTY9',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'upload_date': '20150330',
|
||||
'timestamp': 1427726529,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# data-mpx-src
|
||||
'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data-src
|
||||
'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -274,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE):
|
||||
}
|
||||
|
||||
|
||||
class CSNNEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
|
||||
'info_dict': {
|
||||
'id': 'yvBLLUgQ8WU0',
|
||||
'ext': 'mp4',
|
||||
'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
|
||||
'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
|
||||
'timestamp': 1459369979,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': self._html_search_meta('twitter:player:stream', webpage),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||
|
||||
|
@@ -90,7 +90,7 @@ class NhkVodIE(NhkBaseIE):
|
||||
_TESTS = [{
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
||||
'info_dict': {
|
||||
'id': 'a95j5iza',
|
||||
'ext': 'mp4',
|
||||
|
@@ -1,20 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import datetime
|
||||
import functools
|
||||
import json
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
@@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor):
|
||||
if urlh is False:
|
||||
login_ok = False
|
||||
else:
|
||||
parts = compat_urlparse.urlparse(urlh.geturl())
|
||||
parts = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||
login_ok = False
|
||||
if not login_ok:
|
||||
@@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
|
||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||
flv_info = compat_parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
@@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
|
||||
class NiconicoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||
'info_dict': {
|
||||
'id': '27411728',
|
||||
'title': 'AKB48のオールナイトニッポン',
|
||||
'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
|
||||
'uploader': 'のっく',
|
||||
'uploader_id': '805442',
|
||||
},
|
||||
'playlist_mincount': 225,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _call_api(self, list_id, resource, query):
|
||||
return self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
'Downloading %s JSON metatdata' % resource, query=query,
|
||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||
|
||||
def _parse_owner(self, item):
|
||||
owner = item.get('owner') or {}
|
||||
if owner:
|
||||
return {
|
||||
'uploader': owner.get('name'),
|
||||
'uploader_id': owner.get('id'),
|
||||
}
|
||||
return {}
|
||||
|
||||
def _fetch_page(self, list_id, page):
|
||||
page += 1
|
||||
items = self._call_api(list_id, 'page %d' % page, {
|
||||
'page': page,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
})['items']
|
||||
for item in items:
|
||||
video = item.get('video') or {}
|
||||
video_id = video.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
count = video.get('count') or {}
|
||||
get_count = lambda x: int_or_none(count.get(x))
|
||||
info = {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
'description': video.get('shortDescription'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': get_count('view'),
|
||||
'comment_count': get_count('comment'),
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
}
|
||||
info.update(self._parse_owner(video))
|
||||
yield info
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
|
||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
||||
webpage, 'entries')
|
||||
entries = json.loads(entries_json)
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||
entry['item_data']['video_id']),
|
||||
} for entry in entries]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
|
||||
'id': list_id,
|
||||
'entries': entries,
|
||||
}
|
||||
mylist = self._call_api(list_id, 'list', {
|
||||
'pageSize': 1,
|
||||
})
|
||||
entries = InAdvancePagedList(
|
||||
functools.partial(self._fetch_page, list_id),
|
||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
result = self.playlist_result(
|
||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||
result.update(self._parse_owner(mylist))
|
||||
return result
|
||||
|
@@ -5,10 +5,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'$include': '[HasClosedCaptions]',
|
||||
})
|
||||
|
||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
|
||||
if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
manifest_base_url = content_package_url + 'manifest.'
|
||||
@@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for image in content.get('Images', []):
|
||||
for image in (content.get('Images') or []):
|
||||
image_url = image.get('Url')
|
||||
if not image_url:
|
||||
continue
|
||||
@@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
continue
|
||||
container.append(e_name)
|
||||
|
||||
season = content.get('Season', {})
|
||||
season = content.get('Season') or {}
|
||||
|
||||
info = {
|
||||
'id': content_id,
|
||||
@@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'episode_number': int_or_none(content.get('Episode')),
|
||||
'season': season.get('Name'),
|
||||
'season_number': season.get('Number'),
|
||||
'season_number': int_or_none(season.get('Number')),
|
||||
'season_id': season.get('Id'),
|
||||
'series': content.get('Media', {}).get('Name'),
|
||||
'series': try_get(content, lambda x: x['Media']['Name']),
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'duration': float_or_none(content_package.get('Duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
if content_package.get('HasClosedCaptions'):
|
||||
|
@@ -6,16 +6,13 @@ import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
url_or_none,
|
||||
@@ -33,8 +30,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_nrk_formats(self, asset_url, video_id):
|
||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||
return self._extract_akamai_formats(
|
||||
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
|
||||
return self._extract_akamai_formats(asset_url, video_id)
|
||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
@@ -64,7 +60,8 @@ class NRKBaseIE(InfoExtractor):
|
||||
return self._download_json(
|
||||
urljoin('http://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query)
|
||||
fatal=fatal, query=query,
|
||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
@@ -117,9 +114,39 @@ class NRKIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# podcast
|
||||
'url': 'nrk:l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:podcast/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# clip
|
||||
'url': 'nrk:150533',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:clip/150533',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# program
|
||||
'url': 'nrk:MDDP12000117',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:program/ENRK10100318',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# direkte
|
||||
'url': 'nrk:nrk1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'nrk:channel/nrk1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_playback(self, video_id):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).split('/')[-1]
|
||||
|
||||
path_templ = 'playback/%s/' + video_id
|
||||
|
||||
def call_playback_api(item, query=None):
|
||||
@@ -127,6 +154,8 @@ class NRKIE(NRKBaseIE):
|
||||
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
||||
|
||||
video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id
|
||||
|
||||
if manifest.get('playability') == 'nonPlayable':
|
||||
self._raise_error(manifest['nonPlayable'])
|
||||
|
||||
@@ -141,8 +170,15 @@ class NRKIE(NRKBaseIE):
|
||||
format_url = url_or_none(asset.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||
asset_format = (asset.get('format') or '').lower()
|
||||
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
||||
elif asset_format == 'mp3':
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': asset_format,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = call_playback_api('metadata')
|
||||
@@ -169,31 +205,94 @@ class NRKIE(NRKBaseIE):
|
||||
'height': int_or_none(image.get('pixelHeight')),
|
||||
})
|
||||
|
||||
return {
|
||||
subtitles = {}
|
||||
for sub in try_get(playable, lambda x: x['subtitles'], list) or []:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = url_or_none(sub.get('webVtt'))
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_key = str_or_none(sub.get('language')) or 'nb'
|
||||
sub_type = str_or_none(sub.get('type'))
|
||||
if sub_type:
|
||||
sub_key += '-%s' % sub_type
|
||||
subtitles.setdefault(sub_key, []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
legal_age = try_get(
|
||||
data, lambda x: x['legalAge']['body']['rating']['code'], compat_str)
|
||||
# https://en.wikipedia.org/wiki/Norwegian_Media_Authority
|
||||
if legal_age == 'A':
|
||||
age_limit = 0
|
||||
elif legal_age.isdigit():
|
||||
age_limit = int_or_none(legal_age)
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
is_series = try_get(data, lambda x: x['_links']['series']['name']) == 'series'
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_playback(video_id)
|
||||
if is_series:
|
||||
series = season_id = season_number = episode = episode_number = None
|
||||
programs = self._call_api(
|
||||
'programs/%s' % video_id, video_id, 'programs', fatal=False)
|
||||
if programs and isinstance(programs, dict):
|
||||
series = str_or_none(programs.get('seriesTitle'))
|
||||
season_id = str_or_none(programs.get('seasonId'))
|
||||
season_number = int_or_none(programs.get('seasonNumber'))
|
||||
episode = str_or_none(programs.get('episodeTitle'))
|
||||
episode_number = int_or_none(programs.get('episodeNumber'))
|
||||
if not series:
|
||||
series = title
|
||||
if alt_title:
|
||||
title += ' - %s' % alt_title
|
||||
if not season_number:
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'Sesong\s+(\d+)', description or '', 'season number',
|
||||
default=None))
|
||||
if not episode:
|
||||
episode = alt_title if is_series else None
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'^(\d+)\.', episode or '', 'episode number',
|
||||
default=None))
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'\((\d+)\s*:\s*\d+\)', description or '',
|
||||
'episode number', default=None))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': series,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
class NRKTVIE(InfoExtractor):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
|
||||
'info_dict': {
|
||||
'id': 'MDDP12000117AA',
|
||||
'id': 'MDDP12000117',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alarm Trolltunga',
|
||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
@@ -204,24 +303,27 @@ class NRKTVIE(NRKBaseIE):
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'id': 'MUHH48000314',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'title': '20 spørsmål - 23. mai 2014',
|
||||
'alt_title': '23. mai 2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
'episode': '23. mai 2014',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
'id': 'MDFP15000514CA',
|
||||
'id': 'MDFP15000514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'title': 'Kunnskapskanalen - Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605.08,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
'episode': 'Grunnlovsjubiléet - Stor ståhei for ingenting',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -230,10 +332,11 @@ class NRKTVIE(NRKBaseIE):
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'id': 'MSPO40010515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -243,24 +346,27 @@ class NRKTVIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'id': 'MSPO40010515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'skip': 'Ikke tilgjengelig utenfor Norge',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
'id': 'KMTE50001317AA',
|
||||
'id': 'KMTE50001317',
|
||||
'ext': 'mp4',
|
||||
'title': 'Anno 13:30',
|
||||
'title': 'Anno - 13. episode',
|
||||
'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
|
||||
'duration': 2340,
|
||||
'series': 'Anno',
|
||||
'episode': '13:30',
|
||||
'episode': '13. episode',
|
||||
'season_number': 3,
|
||||
'episode_number': 13,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -268,13 +374,14 @@ class NRKTVIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
|
||||
'info_dict': {
|
||||
'id': 'MUHH46000317AA',
|
||||
'id': 'MUHH46000317',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nytt på Nytt 27.01.2017',
|
||||
'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
|
||||
'duration': 1796,
|
||||
'series': 'Nytt på nytt',
|
||||
'episode': '27.01.2017',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -291,180 +398,26 @@ class NRKTVIE(NRKBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_api_host = None
|
||||
|
||||
def _extract_from_mediaelement(self, video_id):
|
||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||
|
||||
for api_host in api_hosts:
|
||||
data = self._download_json(
|
||||
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||
video_id, 'Downloading mediaelement JSON',
|
||||
fatal=api_host == api_hosts[-1])
|
||||
if not data:
|
||||
continue
|
||||
self._api_host = api_host
|
||||
break
|
||||
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
urls = []
|
||||
entries = []
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
live = (data.get('mediaElementType') == 'Live'
|
||||
or data.get('isLive') is True or conviva.get('isLive'))
|
||||
|
||||
def make_title(t):
|
||||
return self._live_title(t) if live else t
|
||||
|
||||
media_assets = data.get('mediaAssets')
|
||||
if media_assets and isinstance(media_assets, list):
|
||||
def video_id_and_title(idx):
|
||||
return ((video_id, title) if len(media_assets) == 1
|
||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||
for num, asset in enumerate(media_assets, 1):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url or asset_url in urls:
|
||||
continue
|
||||
urls.append(asset_url)
|
||||
formats = self._extract_nrk_formats(asset_url, video_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
entry_id, entry_title = video_id_and_title(num)
|
||||
duration = parse_duration(asset.get('duration'))
|
||||
subtitles = {}
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({
|
||||
'url': compat_urllib_parse_unquote(subtitle_url)
|
||||
})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': make_title(entry_title),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
media_url = data.get('mediaUrl')
|
||||
if media_url and media_url not in urls:
|
||||
formats = self._extract_nrk_formats(media_url, video_id)
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': make_title(title),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
self._raise_error(data)
|
||||
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
web_images = images.get('webImages')
|
||||
if isinstance(web_images, list):
|
||||
thumbnails = [{
|
||||
'url': image['imageUrl'],
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
|
||||
|
||||
for entry in entries:
|
||||
entry.update(common_info)
|
||||
for f in entry['formats']:
|
||||
f['vcodec'] = vcodec
|
||||
|
||||
points = data.get('shortIndexPoints')
|
||||
if isinstance(points, list):
|
||||
chapters = []
|
||||
for next_num, point in enumerate(points, start=1):
|
||||
if not isinstance(point, dict):
|
||||
continue
|
||||
start_time = parse_duration(point.get('startPoint'))
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = parse_duration(
|
||||
data.get('duration')
|
||||
if next_num == len(points)
|
||||
else points[next_num].get('startPoint'))
|
||||
if end_time is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': point.get('title'),
|
||||
})
|
||||
if chapters and len(entries) == 1:
|
||||
entries[0]['chapters'] = chapters
|
||||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_mediaelement(video_id)
|
||||
return self.url_result(
|
||||
'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/(?P<season_number>\d+)/episode/(?P<episode_number>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
|
||||
'info_dict': {
|
||||
'id': 'MUHH36005220BA',
|
||||
'id': 'MUHH36005220',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kro, krig og kjærlighet 2:6',
|
||||
'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
|
||||
'duration': 1563,
|
||||
'title': 'Hellums kro - 2. Kro, krig og kjærlighet',
|
||||
'description': 'md5:ad92ddffc04cea8ce14b415deef81787',
|
||||
'duration': 1563.92,
|
||||
'series': 'Hellums kro',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
'episode': '2:6',
|
||||
'episode': '2. Kro, krig og kjærlighet',
|
||||
'age_limit': 6,
|
||||
},
|
||||
'params': {
|
||||
@@ -473,15 +426,16 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
|
||||
'info_dict': {
|
||||
'id': 'MSUI14000816AA',
|
||||
'id': 'MSUI14000816',
|
||||
'ext': 'mp4',
|
||||
'title': 'Backstage 8:30',
|
||||
'title': 'Backstage - 8. episode',
|
||||
'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
|
||||
'duration': 1320,
|
||||
'series': 'Backstage',
|
||||
'season_number': 1,
|
||||
'episode_number': 8,
|
||||
'episode': '8:30',
|
||||
'episode': '8. episode',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -490,7 +444,7 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id, season_number, episode_number = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
@@ -502,10 +456,12 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'_type': 'url',
|
||||
'id': nrk_id,
|
||||
'url': 'nrk:%s' % nrk_id,
|
||||
'ie_key': NRKIE.ie_key(),
|
||||
'season_number': int(season_number),
|
||||
'episode_number': int(episode_number),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -519,8 +475,6 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
return entries
|
||||
@@ -532,6 +486,10 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
if embedded.get(asset_key):
|
||||
return asset_key
|
||||
|
||||
@staticmethod
|
||||
def _catalog_name(serie_kind):
|
||||
return 'podcast' if serie_kind in ('podcast', 'podkast') else 'series'
|
||||
|
||||
def _entries(self, data, display_id):
|
||||
for page_num in itertools.count(1):
|
||||
embedded = data.get('_embedded') or data
|
||||
@@ -565,7 +523,16 @@ class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
|
||||
|
||||
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<domain>tv|radio)\.nrk\.no/
|
||||
(?P<serie_kind>serie|pod[ck]ast)/
|
||||
(?P<serie>[^/]+)/
|
||||
(?:
|
||||
(?:sesong/)?(?P<id>\d+)|
|
||||
sesong/(?P<id_2>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||
'info_dict': {
|
||||
@@ -601,19 +568,34 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
# 180 entries, single page
|
||||
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/diagnose-kverulant',
|
||||
'info_dict': {
|
||||
'id': 'hele_historien/diagnose-kverulant',
|
||||
'title': 'Diagnose kverulant',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/loerdagsraadet/sesong/202101',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
|
||||
return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url)
|
||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, serie, season_id = re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
domain = mobj.group('domain')
|
||||
serie_kind = mobj.group('serie_kind')
|
||||
serie = mobj.group('serie')
|
||||
season_id = mobj.group('id') or mobj.group('id_2')
|
||||
display_id = '%s/%s' % (serie, season_id)
|
||||
|
||||
data = self._call_api(
|
||||
'%s/catalog/series/%s/seasons/%s' % (domain, serie, season_id),
|
||||
'%s/catalog/%s/%s/seasons/%s'
|
||||
% (domain, self._catalog_name(serie_kind), serie, season_id),
|
||||
display_id, 'season', query={'pageSize': 50})
|
||||
|
||||
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
|
||||
@@ -623,7 +605,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
|
||||
|
||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/serie/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/(?P<serie_kind>serie|pod[ck]ast)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# new layout, instalments
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
@@ -683,23 +665,33 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
}, {
|
||||
'url': 'https://nrksuper.no/serie/labyrint',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers',
|
||||
'info_dict': {
|
||||
'id': 'ulrikkes_univers',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/nrkno-poddkast-26588-134079-05042018030000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
False if any(ie.suitable(url)
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
|
||||
for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE))
|
||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, series_id = re.match(self._VALID_URL, url).groups()
|
||||
site, serie_kind, series_id = re.match(self._VALID_URL, url).groups()
|
||||
is_radio = site == 'radio.nrk'
|
||||
domain = 'radio' if is_radio else 'tv'
|
||||
|
||||
size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
|
||||
series = self._call_api(
|
||||
'%s/catalog/series/%s' % (domain, series_id),
|
||||
'%s/catalog/%s/%s'
|
||||
% (domain, self._catalog_name(serie_kind), series_id),
|
||||
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
||||
titles = try_get(series, [
|
||||
lambda x: x['titles'],
|
||||
@@ -714,12 +706,14 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
embedded_seasons = embedded.get('seasons') or []
|
||||
if len(linked_seasons) > len(embedded_seasons):
|
||||
for season in linked_seasons:
|
||||
season_name = season.get('name')
|
||||
if season_name and isinstance(season_name, compat_str):
|
||||
season_url = urljoin(url, season.get('href'))
|
||||
if not season_url:
|
||||
season_name = season.get('name')
|
||||
if season_name and isinstance(season_name, compat_str):
|
||||
season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name)
|
||||
if season_url:
|
||||
entries.append(self.url_result(
|
||||
'https://%s.nrk.no/serie/%s/sesong/%s'
|
||||
% (domain, series_id, season_name),
|
||||
ie=NRKTVSeasonIE.ie_key(),
|
||||
season_url, ie=NRKTVSeasonIE.ie_key(),
|
||||
video_title=season.get('title')))
|
||||
else:
|
||||
for season in embedded_seasons:
|
||||
@@ -744,6 +738,38 @@ class NRKTVDirekteIE(NRKTVIE):
|
||||
}]
|
||||
|
||||
|
||||
class NRKRadioPodkastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://radio\.nrk\.no/pod[ck]ast/(?:[^/]+/)+(?P<id>l_[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
'title': '20 spørsmål 23.05.2014',
|
||||
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
|
||||
'duration': 1741,
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podcast/ulrikkes_univers/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/ulrikkes_univers/sesong/1/l_96f4f1b0-de54-4e6a-b4f1-b0de54fe6af8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/podkast/hele_historien/sesong/bortfoert-i-bergen/l_774d1a2c-7aa7-4965-8d1a-2c7aa7d9652c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class NRKPlaylistBaseIE(InfoExtractor):
|
||||
def _extract_description(self, webpage):
|
||||
pass
|
||||
|
@@ -6,16 +6,33 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
(?:
|
||||
player\.
|
||||
(?:
|
||||
olympusattelecom|
|
||||
vibebyvista
|
||||
)|
|
||||
(?:api|player)\.multicastmedia|
|
||||
(?:api-ovp|player)\.piksel
|
||||
)\.com|
|
||||
(?:
|
||||
mz-edge\.stream\.co|
|
||||
movie-s\.nhk\.or
|
||||
)\.jp|
|
||||
vidego\.baltimorecity\.gov
|
||||
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/ums2867l',
|
||||
@@ -56,46 +73,41 @@ class PikselIE(InfoExtractor):
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _call_api(self, app_token, resource, display_id, query, fatal=True):
|
||||
response = (self._download_json(
|
||||
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
|
||||
display_id, query=query, fatal=fatal) or {}).get('response')
|
||||
failure = try_get(response, lambda x: x['failure']['reason'])
|
||||
if failure:
|
||||
if fatal:
|
||||
raise ExtractorError(failure, expected=True)
|
||||
self.report_warning(failure)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
ref_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
|
||||
webpage, 'program uuid', default=display_id)
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
'v': video_id
|
||||
})['response']
|
||||
failure = response.get('failure')
|
||||
if failure:
|
||||
raise ExtractorError(response['failure']['reason'], expected=True)
|
||||
video_data = response['WsProgramResponse']['program']['asset']
|
||||
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
|
||||
program = self._call_api(
|
||||
app_token, 'program', display_id, query)['WsProgramResponse']['program']
|
||||
video_id = program['uuid']
|
||||
video_data = program['asset']
|
||||
title = video_data['title']
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
for asset_file in video_data.get('assetFiles', []):
|
||||
def process_asset_file(asset_file):
|
||||
if not asset_file:
|
||||
return
|
||||
# TODO: extract rtmp formats
|
||||
http_url = asset_file.get('http_url')
|
||||
if not http_url:
|
||||
continue
|
||||
return
|
||||
tbr = None
|
||||
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
|
||||
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
|
||||
@@ -118,6 +130,43 @@ class PikselIE(InfoExtractor):
|
||||
'filesize': int_or_none(asset_file.get('filesize')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
def process_asset_files(asset_files):
|
||||
for asset_file in (asset_files or []):
|
||||
process_asset_file(asset_file)
|
||||
|
||||
process_asset_files(video_data.get('assetFiles'))
|
||||
process_asset_file(video_data.get('referenceFile'))
|
||||
if not formats:
|
||||
asset_id = video_data.get('assetid') or program.get('assetid')
|
||||
if asset_id:
|
||||
process_asset_files(try_get(self._call_api(
|
||||
app_token, 'asset_file', display_id, {
|
||||
'assetid': asset_id,
|
||||
}, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
|
||||
if smil_url:
|
||||
transform_source = None
|
||||
if ref_id == 'nhkworld':
|
||||
# TODO: figure out if this is something to be fixed in urljoin,
|
||||
# _parse_smil_formats or keep it here
|
||||
transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
|
||||
formats.extend(self._extract_smil_formats(
|
||||
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
|
||||
transform_source=transform_source, fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@@ -288,14 +288,24 @@ class PornHubIE(PornHubBaseIE):
|
||||
video_urls.append((v_url, None))
|
||||
video_urls_set.add(v_url)
|
||||
|
||||
def parse_quality_items(quality_items):
|
||||
q_items = self._parse_json(quality_items, video_id, fatal=False)
|
||||
if not isinstance(q_items, list):
|
||||
return
|
||||
for item in q_items:
|
||||
if isinstance(item, dict):
|
||||
add_video_url(item.get('url'))
|
||||
|
||||
if not video_urls:
|
||||
FORMAT_PREFIXES = ('media', 'quality')
|
||||
FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
|
||||
js_vars = extract_js_vars(
|
||||
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
|
||||
default=None)
|
||||
if js_vars:
|
||||
for key, format_url in js_vars.items():
|
||||
if any(key.startswith(p) for p in FORMAT_PREFIXES):
|
||||
if key.startswith(FORMAT_PREFIXES[-1]):
|
||||
parse_quality_items(format_url)
|
||||
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
|
||||
add_video_url(format_url)
|
||||
if not video_urls and re.search(
|
||||
r'<[^>]+\bid=["\']lockedPlayer', webpage):
|
||||
@@ -351,12 +361,16 @@ class PornHubIE(PornHubBaseIE):
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
def extract_vote_count(kind, name):
|
||||
return self._extract_count(
|
||||
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
|
||||
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
|
||||
webpage, name)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
|
||||
like_count = extract_vote_count('Up', 'like')
|
||||
dislike_count = extract_vote_count('Down', 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -56,7 +57,8 @@ class RedditRIE(InfoExtractor):
|
||||
'id': 'zv89llsvexdz',
|
||||
'ext': 'mp4',
|
||||
'title': 'That small heart attack.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:4',
|
||||
'timestamp': 1501941939,
|
||||
'upload_date': '20170805',
|
||||
'uploader': 'Antw87',
|
||||
@@ -118,11 +120,34 @@ class RedditRIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
thumbnails = []
|
||||
|
||||
def add_thumbnail(src):
|
||||
if not isinstance(src, dict):
|
||||
return
|
||||
thumbnail_url = url_or_none(src.get('url'))
|
||||
if not thumbnail_url:
|
||||
return
|
||||
thumbnails.append({
|
||||
'url': unescapeHTML(thumbnail_url),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
})
|
||||
|
||||
for image in try_get(data, lambda x: x['preview']['images']) or []:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
add_thumbnail(image.get('source'))
|
||||
resolutions = image.get('resolutions')
|
||||
if isinstance(resolutions, list):
|
||||
for resolution in resolutions:
|
||||
add_thumbnail(resolution)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'title': data.get('title'),
|
||||
'thumbnail': url_or_none(data.get('thumbnail')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': float_or_none(data.get('created_utc')),
|
||||
'uploader': data.get('author'),
|
||||
'duration': int_or_none(try_get(
|
||||
|
@@ -4,8 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -41,16 +45,22 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
def _real_extract(self, url):
|
||||
path, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
try:
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), episode_id)[0]['error_code'], expected=True)
|
||||
raise
|
||||
|
||||
for source in media.get('sources', {}):
|
||||
src = source.get('src')
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
@@ -11,36 +13,59 @@ from ..utils import (
|
||||
|
||||
|
||||
class SkyBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
|
||||
webpage, 'video data'))
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
_SDC_EL_REGEX = r'(?s)(<div[^>]+data-(?:component-name|fn)="sdc-(?:articl|sit)e-video"[^>]*>)'
|
||||
|
||||
video_url = 'ooyala:%s' % video_data['data-video-id']
|
||||
if video_data.get('data-token-required') == 'true':
|
||||
token_fetch_options = self._parse_json(video_data.get(
|
||||
'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
|
||||
token_fetch_url = token_fetch_options.get('url')
|
||||
if token_fetch_url:
|
||||
embed_token = self._download_webpage(urljoin(
|
||||
url, token_fetch_url), video_id, fatal=False)
|
||||
if embed_token:
|
||||
video_url = smuggle_url(
|
||||
video_url, {'embed_token': embed_token.strip('"')})
|
||||
def _process_ooyala_element(self, webpage, sdc_el, url):
|
||||
sdc = extract_attributes(sdc_el)
|
||||
provider = sdc.get('data-provider')
|
||||
if provider == 'ooyala':
|
||||
video_id = sdc['data-sdc-video-id']
|
||||
video_url = 'ooyala:%s' % video_id
|
||||
ie_key = 'Ooyala'
|
||||
ooyala_el = self._search_regex(
|
||||
r'(<div[^>]+class="[^"]*\bsdc-article-video__media-ooyala\b[^"]*"[^>]+data-video-id="%s"[^>]*>)' % video_id,
|
||||
webpage, 'video data', fatal=False)
|
||||
if ooyala_el:
|
||||
ooyala_attrs = extract_attributes(ooyala_el) or {}
|
||||
if ooyala_attrs.get('data-token-required') == 'true':
|
||||
token_fetch_url = (self._parse_json(ooyala_attrs.get(
|
||||
'data-token-fetch-options', '{}'),
|
||||
video_id, fatal=False) or {}).get('url')
|
||||
if token_fetch_url:
|
||||
embed_token = self._download_json(urljoin(
|
||||
url, token_fetch_url), video_id, fatal=False)
|
||||
if embed_token:
|
||||
video_url = smuggle_url(
|
||||
video_url, {'embed_token': embed_token})
|
||||
elif provider == 'brightcove':
|
||||
video_id = sdc['data-video-id']
|
||||
account_id = sdc.get('data-account-id') or '6058004172001'
|
||||
player_id = sdc.get('data-player-id') or 'RC9PQUaJ6'
|
||||
video_url = self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id)
|
||||
ie_key = 'BrightcoveNew'
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ie_key': ie_key,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._process_ooyala_element(webpage, self._search_regex(
|
||||
self._SDC_EL_REGEX, webpage, 'sdc element'), url)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': strip_or_none(self._og_search_description(webpage)),
|
||||
'ie_key': 'Ooyala',
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class SkySportsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:sports'
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/([^/]+/)*(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
|
||||
@@ -62,15 +87,45 @@ class SkySportsIE(SkyBaseIE):
|
||||
|
||||
|
||||
class SkyNewsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:news'
|
||||
_VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
|
||||
'md5': 'd6327e581473cea9976a3236ded370cd',
|
||||
'md5': '411e8893fd216c75eaf7e4c65d364115',
|
||||
'info_dict': {
|
||||
'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
|
||||
'id': 'ref:1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
|
||||
'ext': 'mp4',
|
||||
'title': 'Russian plane inspected after deadly fire',
|
||||
'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
|
||||
'uploader_id': '6058004172001',
|
||||
'timestamp': 1567112345,
|
||||
'upload_date': '20190829',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}
|
||||
|
||||
|
||||
class SkySportsNewsIE(SkyBaseIE):
|
||||
IE_NAME = 'sky:sports:news'
|
||||
_VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.skysports.com/golf/news/12176/10871916/dustin-johnson-ready-to-conquer-players-championship-at-tpc-sawgrass',
|
||||
'info_dict': {
|
||||
'id': '10871916',
|
||||
'title': 'Dustin Johnson ready to conquer Players Championship at TPC Sawgrass',
|
||||
'description': 'Dustin Johnson is confident he can continue his dominant form in 2017 by adding the Players Championship to his list of victories.',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
|
||||
entries = []
|
||||
for sdc_el in re.findall(self._SDC_EL_REGEX, webpage):
|
||||
entries.append(self._process_ooyala_element(webpage, sdc_el, url))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, article_id, self._og_search_title(webpage),
|
||||
self._html_search_meta(['og:description', 'description'], webpage))
|
||||
|
@@ -1,40 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SonyLIVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
|
||||
'info_dict': {
|
||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||
'id': 'ref:5024612095001',
|
||||
'title': 'Bachelors Delight - Achaari Cheese Toast',
|
||||
'id': '1000022678',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170923',
|
||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||
'uploader_id': '5182475815001',
|
||||
'timestamp': 1506200547,
|
||||
'upload_date': '20200411',
|
||||
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
|
||||
'timestamp': 1586632091,
|
||||
'duration': 185,
|
||||
'season_number': 1,
|
||||
'episode': 'Achaari Cheese Toast',
|
||||
'episode_number': 1,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_TOKEN = None
|
||||
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
|
||||
def _call_api(self, version, path, video_id):
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['security_token'] = self._TOKEN
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
|
||||
video_id, headers=headers)['resultObj']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
if message == 'Geoblocked Country':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(message)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
||||
'geo_countries': ['IN'],
|
||||
'referrer': url,
|
||||
}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
video_id = self._match_id(url)
|
||||
content = self._call_api(
|
||||
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
|
||||
if content.get('isEncrypted'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
dash_url = content['videoURL']
|
||||
headers = {
|
||||
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
|
||||
}
|
||||
formats = self._extract_mpd_formats(
|
||||
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
|
||||
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._call_api(
|
||||
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
|
||||
title = metadata['title']
|
||||
episode = metadata.get('episodeTitle')
|
||||
if episode and title != episode:
|
||||
title += ' - ' + episode
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('posterURL'),
|
||||
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
|
||||
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'season_number': int_or_none(metadata.get('season')),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'release_year': int_or_none(metadata.get('year')),
|
||||
}
|
||||
|
@@ -7,17 +7,24 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?spankbang\.com/
|
||||
(?:
|
||||
(?P<id>[\da-z]+)/(?:video|play|embed)\b|
|
||||
[\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
@@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2y3td/embed/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
webpage = self._download_webpage(
|
||||
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||
video_id, headers={'Cookie': 'country=US'})
|
||||
@@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor):
|
||||
|
||||
|
||||
class SpankBangPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||
'info_dict': {
|
||||
'id': 'ug0k',
|
||||
'title': 'Big Ass Titties',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_mincount': 40,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||
|
||||
entries = [self.url_result(
|
||||
'https://spankbang.com/%s/video' % video_id,
|
||||
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||
urljoin(url, mobj.group('path')),
|
||||
ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
|
||||
% re.escape(display_id), webpage)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||
r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
@@ -3,50 +3,62 @@ from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'md5': '74bf14128578d1e040c3ebc82088f45f',
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': '9dexnwtmh8_X',
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Cowboy Adventure',
|
||||
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
|
||||
'timestamp': 1437758640,
|
||||
'upload_date': '20150724',
|
||||
'uploader': 'NBCU-SPROUT-NEW',
|
||||
}
|
||||
}
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_component = self._search_regex(
|
||||
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
|
||||
webpage, 'video component', default=None)
|
||||
if video_component:
|
||||
options = self._parse_json(extract_attributes(
|
||||
video_component)['data-options'], video_id)
|
||||
theplatform_url = options['video']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if options.get('protected'):
|
||||
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
theplatform_url, query), {'force_smil_url': True})
|
||||
else:
|
||||
iframe = self._search_regex(
|
||||
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
|
||||
webpage, 'iframe')
|
||||
theplatform_url = extract_attributes(iframe)['src']
|
||||
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@@ -4,25 +4,28 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
unescapeHTML,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class StitcherIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/(?:[^/]+/)+e(?:pisode)?/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
|
||||
'md5': '391dd4e021e6edeb7b8e68fbf2e9e940',
|
||||
'md5': 'e9635098e0da10b21a0e2b85585530f6',
|
||||
'info_dict': {
|
||||
'id': '40789481',
|
||||
'ext': 'mp3',
|
||||
'title': 'Machine Learning Mastery and Cancer Clusters',
|
||||
'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3',
|
||||
'description': 'md5:547adb4081864be114ae3831b4c2b42f',
|
||||
'duration': 1604,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20180126',
|
||||
'timestamp': 1516989316,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
|
||||
@@ -38,6 +41,7 @@ class StitcherIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Page Not Found',
|
||||
}, {
|
||||
# escaped title
|
||||
'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
|
||||
@@ -45,37 +49,39 @@ class StitcherIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
audio_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or audio_id
|
||||
display_id, audio_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
resp = self._download_json(
|
||||
'https://api.prod.stitcher.com/episode/' + audio_id,
|
||||
display_id or audio_id)
|
||||
episode = try_get(resp, lambda x: x['data']['episodes'][0], dict)
|
||||
if not episode:
|
||||
raise ExtractorError(resp['errors'][0]['message'], expected=True)
|
||||
|
||||
episode = self._parse_json(
|
||||
js_to_json(self._search_regex(
|
||||
r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')),
|
||||
display_id)['config']['episode']
|
||||
title = episode['title'].strip()
|
||||
audio_url = episode['audio_url']
|
||||
|
||||
title = unescapeHTML(episode['title'])
|
||||
formats = [{
|
||||
'url': episode[episode_key],
|
||||
'ext': determine_ext(episode[episode_key]) or 'mp3',
|
||||
'vcodec': 'none',
|
||||
} for episode_key in ('episodeURL',) if episode.get(episode_key)]
|
||||
description = self._search_regex(
|
||||
r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False)
|
||||
duration = int_or_none(episode.get('duration'))
|
||||
thumbnail = episode.get('episodeImage')
|
||||
thumbnail = None
|
||||
show_id = episode.get('show_id')
|
||||
if show_id and episode.get('classic_id') != -1:
|
||||
thumbnail = 'https://stitcher-classic.imgix.net/feedimages/%s.jpg' % show_id
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'description': clean_html(episode.get('html_description') or episode.get('description')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
'timestamp': int_or_none(episode.get('date_created')),
|
||||
'season_number': int_or_none(episode.get('season')),
|
||||
'season_id': str_or_none(episode.get('season_id')),
|
||||
}
|
||||
|
@@ -2,25 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class StreetVoiceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
||||
'md5': '15974627fc01a29e492c98593c2fd472',
|
||||
'url': 'https://streetvoice.com/skippylu/songs/123688/',
|
||||
'md5': '0eb535970629a5195685355f3ed60bfd',
|
||||
'info_dict': {
|
||||
'id': '94440',
|
||||
'id': '123688',
|
||||
'ext': 'mp3',
|
||||
'title': '輸',
|
||||
'description': 'Crispy脆樂團 - 輸',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 260,
|
||||
'upload_date': '20091018',
|
||||
'title': '流浪',
|
||||
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 270,
|
||||
'upload_date': '20100923',
|
||||
'uploader': 'Crispy脆樂團',
|
||||
'uploader_id': '627810',
|
||||
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
|
||||
'timestamp': 1285261661,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'track': '流浪',
|
||||
'track_id': '123688',
|
||||
'album': '2010',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||
@@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
song = self._download_json(
|
||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
||||
|
||||
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
|
||||
song = self._download_json(base_url, song_id, query={
|
||||
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
|
||||
})
|
||||
title = song['name']
|
||||
author = song['user']['nickname']
|
||||
|
||||
formats = []
|
||||
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
|
||||
f_url = (self._download_json(
|
||||
base_url + suffix + '/', song_id,
|
||||
'Downloading %s format URL' % format_id,
|
||||
data=b'', fatal=False) or {}).get('file')
|
||||
if not f_url:
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp3',
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if format_id == 'hls':
|
||||
f['protocol'] = 'm3u8_native'
|
||||
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
|
||||
if abr:
|
||||
abr = int(abr)
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
user = song.get('user') or {}
|
||||
username = user.get('username')
|
||||
get_count = lambda x: int_or_none(song.get(x + '_count'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song['file'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': '%s - %s' % (author, title),
|
||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||
'duration': song.get('length'),
|
||||
'upload_date': unified_strdate(song.get('created_at')),
|
||||
'uploader': author,
|
||||
'uploader_id': compat_str(song['user']['id']),
|
||||
'description': strip_or_none(song.get('synopsis')),
|
||||
'thumbnail': song.get('image'),
|
||||
'duration': int_or_none(song.get('length')),
|
||||
'timestamp': parse_iso8601(song.get('created_at')),
|
||||
'uploader': try_get(user, lambda x: x['profile']['nickname']),
|
||||
'uploader_id': str_or_none(user.get('id')),
|
||||
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
|
||||
'view_count': get_count('plays'),
|
||||
'like_count': get_count('likes'),
|
||||
'comment_count': get_count('comments'),
|
||||
'repost_count': get_count('share'),
|
||||
'track': title,
|
||||
'track_id': song_id,
|
||||
'album': try_get(song, lambda x: x['album']['name']),
|
||||
}
|
||||
|
@@ -1,43 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TastyTradeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
'title': 'A History of Teaming',
|
||||
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': OoyalaIE.ie_key(),
|
||||
'url': 'ooyala:%s' % ooyala_code,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
@@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE):
|
||||
@staticmethod
|
||||
def _is_teachable(webpage):
|
||||
return 'teachableTracker.linker:autoLink' in webpage and re.search(
|
||||
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
|
||||
r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
|
||||
webpage)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -5,14 +5,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'md5': '7ee56d665cfd241c0e6d80fd175068b0',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
@@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor):
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'md5': 'eddb50291df704ce23c74821b995bcac',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor):
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
stream = caronte['dls'][0]['stream']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
'Origin': re.match(r'https?://[^/]+', url).group(0),
|
||||
})
|
||||
cdn = self._download_json(
|
||||
caronte['cerbero'], video_id, data=json.dumps({
|
||||
'bbx': caronte['bbx'],
|
||||
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
|
||||
}).encode(), headers=headers)['tokens']['1']['cdn']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
'duration': int_or_none(content.get('dataDuration')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
# smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,14 +25,16 @@ class TenPlayIE(InfoExtractor):
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# 'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
_GEO_BYPASS = False
|
||||
_FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect'
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
@@ -40,19 +43,28 @@ class TenPlayIE(InfoExtractor):
|
||||
video = data.get('video') or {}
|
||||
metadata = data.get('metaData') or {}
|
||||
brightcove_id = video.get('videoId') or metadata['showContentVideoId']
|
||||
brightcove_url = smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['AU']})
|
||||
# brightcove_url = smuggle_url(
|
||||
# self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
# {'geo_countries': ['AU']})
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': brightcove_url,
|
||||
'id': content_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': brightcove_url,
|
||||
'formats': formats,
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
|
||||
'description': video.get('description'),
|
||||
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
|
||||
'series': metadata.get('showName'),
|
||||
'season': metadata.get('showContentSeason'),
|
||||
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'thumbnail': video.get('poster'),
|
||||
'uploader_id': '2199827728001',
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
provider_id = mobj.group('provider_id')
|
||||
|
@@ -1,18 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TheWeatherChannelIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
|
||||
'md5': 'ab924ac9574e79689c24c6b95e957def',
|
||||
'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
|
||||
'info_dict': {
|
||||
'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
|
||||
'ext': 'mp4',
|
||||
@@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
|
||||
'uploader': 'TWC - Digital (No Distro)',
|
||||
'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
|
||||
'upload_date': '20160720',
|
||||
'timestamp': 1469018835,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
video_id = drupal_settings['twc']['contexts']['node']['uuid']
|
||||
video_data = self._download_json(
|
||||
'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
|
||||
asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not locale:
|
||||
locale = 'en-US'
|
||||
video_data = list(self._download_json(
|
||||
'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
|
||||
'name': 'getCMSAssetsUrlConfig',
|
||||
'params': {
|
||||
'language': locale.replace('-', '_'),
|
||||
'query': {
|
||||
'assetName': {
|
||||
'$in': asset_name,
|
||||
},
|
||||
},
|
||||
}
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
|
||||
video_id = video_data['id']
|
||||
seo_meta = video_data.get('seometa', {})
|
||||
title = video_data.get('title') or seo_meta['title']
|
||||
|
||||
@@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
cc_url = video_data.get('cc_url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'uploader': video_data.get('providername'),
|
||||
'uploader_id': video_data.get('providerid'),
|
||||
'timestamp': parse_iso8601(video_data.get('publishdate')),
|
||||
'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor):
|
||||
|
||||
class MeWatchIE(InfoExtractor):
|
||||
IE_NAME = 'mewatch'
|
||||
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
|
||||
'info_dict': {
|
||||
@@ -214,6 +214,15 @@ class MeWatchIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
@@ -49,8 +50,13 @@ class TurnerBaseIE(AdobePassIE):
|
||||
self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
|
||||
return video_url + '?hdnea=' + token
|
||||
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
|
||||
video_data = self._download_xml(data_src, video_id)
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
|
||||
video_data = self._download_xml(
|
||||
data_src, video_id,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=fatal)
|
||||
if not video_data:
|
||||
return {}
|
||||
video_id = video_data.attrib['id']
|
||||
title = xpath_text(video_data, 'headline', fatal=True)
|
||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||
@@ -63,12 +69,14 @@ class TurnerBaseIE(AdobePassIE):
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
rex = re.compile(
|
||||
r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
|
||||
# Possible formats locations: files/file, files/groupFiles/files
|
||||
# and maybe others
|
||||
for video_file in video_data.findall('.//file'):
|
||||
video_url = video_file.text.strip()
|
||||
video_url = url_or_none(video_file.text.strip())
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
@@ -108,9 +116,28 @@ class TurnerBaseIE(AdobePassIE):
|
||||
continue
|
||||
urls.append(video_url)
|
||||
format_id = video_file.get('bitrate')
|
||||
if ext == 'smil':
|
||||
if ext in ('scc', 'srt', 'vtt'):
|
||||
subtitles.setdefault('en', []).append({
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'png':
|
||||
thumbnails.append({
|
||||
'id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
video_url, video_id, {
|
||||
'hds': path_data.get('f4m', {}).get('host'),
|
||||
# nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
|
||||
# ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
|
||||
# ssl.cdn.turner.com
|
||||
'http': 'pmd.cdn.turner.com',
|
||||
}))
|
||||
elif ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
@@ -129,7 +156,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
}
|
||||
mobj = rex.search(format_id + video_url)
|
||||
mobj = rex.search(video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group('width')),
|
||||
@@ -152,7 +179,6 @@ class TurnerBaseIE(AdobePassIE):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
@@ -168,12 +194,12 @@ class TurnerBaseIE(AdobePassIE):
|
||||
}.get(source.get('format'))
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'id': image.get('cut'),
|
||||
thumbnails.extend({
|
||||
'id': image.get('cut') or image.get('name'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.findall('images/image')]
|
||||
} for image in video_data.findall('images/image'))
|
||||
|
||||
is_live = xpath_text(video_data, 'isLive') == 'true'
|
||||
|
||||
|
@@ -5,10 +5,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '2117008346001',
|
||||
'ext': 'mp4',
|
||||
@@ -23,7 +22,11 @@ class UKTVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest']
|
||||
}
|
||||
}, {
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vhx:embed'
|
||||
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
|
||||
return unescapeHTML(mobj.group(1)) if mobj else None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
'ott data'), video_id, js_to_json)['config_url']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
info['id'] = video_id
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
return info
|
||||
|
@@ -12,7 +12,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class VVVVIDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
|
||||
_VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
# video_type == 'video/vvvvid'
|
||||
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
|
||||
@@ -21,6 +22,15 @@ class VVVVIDIE(InfoExtractor):
|
||||
'id': '489048',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ping Pong',
|
||||
'duration': 239,
|
||||
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
|
||||
'season_id': '437',
|
||||
'episode': 'Ping Pong',
|
||||
'episode_number': 1,
|
||||
'episode_id': '3334',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -37,6 +47,9 @@ class VVVVIDIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||
'only_matching': True
|
||||
}]
|
||||
_conn_id = None
|
||||
|
||||
@@ -45,20 +58,35 @@ class VVVVIDIE(InfoExtractor):
|
||||
'https://www.vvvvid.it/user/login',
|
||||
None, headers=self.geo_verification_headers())['data']['conn_id']
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
def _download_info(self, show_id, path, video_id, fatal=True):
|
||||
response = self._download_json(
|
||||
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
|
||||
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
|
||||
video_id, headers=self.geo_verification_headers(), query={
|
||||
'conn_id': self._conn_id,
|
||||
})
|
||||
if response['result'] == 'error':
|
||||
}, fatal=fatal)
|
||||
if not (response or fatal):
|
||||
return
|
||||
if response.get('result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['message']), expected=True)
|
||||
return response['data']
|
||||
|
||||
def _extract_common_video_info(self, video_data):
|
||||
return {
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
response = self._download_info(
|
||||
show_id, 'season/%s' % season_id, video_id)
|
||||
|
||||
vid = int(video_id)
|
||||
video_data = list(filter(
|
||||
lambda episode: episode.get('video_id') == vid, response['data']))[0]
|
||||
lambda episode: episode.get('video_id') == vid, response))[0]
|
||||
title = video_data['title']
|
||||
formats = []
|
||||
|
||||
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
|
||||
@@ -115,6 +143,17 @@ class VVVVIDIE(InfoExtractor):
|
||||
|
||||
return d
|
||||
|
||||
info = {}
|
||||
|
||||
def metadata_from_url(r_url):
|
||||
if not info and r_url:
|
||||
mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
|
||||
if mobj:
|
||||
info['episode_number'] = int(mobj.group(2))
|
||||
season_number = mobj.group(1)
|
||||
if season_number:
|
||||
info['season_number'] = int(season_number)
|
||||
|
||||
for quality in ('_sd', ''):
|
||||
embed_code = video_data.get('embed_info' + quality)
|
||||
if not embed_code:
|
||||
@@ -122,7 +161,6 @@ class VVVVIDIE(InfoExtractor):
|
||||
embed_code = ds(embed_code)
|
||||
video_type = video_data.get('video_type')
|
||||
if video_type in ('video/rcs', 'video/kenc'):
|
||||
embed_code = re.sub(r'https?://([^/]+)/z/', r'https://\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
|
||||
if video_type == 'video/kenc':
|
||||
kenc = self._download_json(
|
||||
'https://www.vvvvid.it/kenc', video_id, query={
|
||||
@@ -133,26 +171,75 @@ class VVVVIDIE(InfoExtractor):
|
||||
kenc_message = kenc.get('message')
|
||||
if kenc_message:
|
||||
embed_code += '?' + ds(kenc_message)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
embed_code, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
||||
else:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||
metadata_from_url(embed_code)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
metadata_from_url(video_data.get('thumbnail'))
|
||||
info.update(self._extract_common_video_info(video_data))
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
'series': video_data.get('show_title'),
|
||||
'season_id': season_id,
|
||||
'season_number': video_data.get('season_number'),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
'episode_number': int_or_none(video_data.get('number')),
|
||||
'episode_title': video_data['title'],
|
||||
'episode': title,
|
||||
'view_count': int_or_none(video_data.get('views')),
|
||||
'like_count': int_or_none(video_data.get('video_likes')),
|
||||
}
|
||||
'repost_count': int_or_none(video_data.get('video_shares')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class VVVVIDShowIE(VVVVIDIE):
|
||||
_VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
|
||||
'info_dict': {
|
||||
'id': '156',
|
||||
'title': 'Psycho-Pass',
|
||||
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
|
||||
},
|
||||
'playlist_count': 46,
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/156',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
seasons = self._download_info(
|
||||
show_id, 'seasons/', show_title)
|
||||
|
||||
show_info = self._download_info(
|
||||
show_id, 'info/', show_title, fatal=False)
|
||||
|
||||
entries = []
|
||||
for season in (seasons or []):
|
||||
episodes = season.get('episodes') or []
|
||||
for episode in episodes:
|
||||
if episode.get('playable') is False:
|
||||
continue
|
||||
season_id = str_or_none(episode.get('season_id'))
|
||||
video_id = str_or_none(episode.get('video_id'))
|
||||
if not (season_id and video_id):
|
||||
continue
|
||||
info = self._extract_common_video_info(episode)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'ie_key': VVVVIDIE.ie_key(),
|
||||
'url': '/'.join([base_url, season_id, video_id]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'season_id': season_id,
|
||||
})
|
||||
entries.append(info)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, show_info.get('title'), show_info.get('description'))
|
||||
|
@@ -4,17 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
)
|
||||
|
||||
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
IE_NAME = 'washingtonpost'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/(?:video|posttv)/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
|
||||
'info_dict': {
|
||||
@@ -23,10 +19,15 @@ class WashingtonPostIE(InfoExtractor):
|
||||
'title': 'Egypt finds belongings, debris from plane crash',
|
||||
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
|
||||
'upload_date': '20160520',
|
||||
'uploader': 'Reuters',
|
||||
'timestamp': 1463778452,
|
||||
'timestamp': 1463775187,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.washingtonpost.com/video/world/egypt-finds-belongings-debris-from-plane-crash/2016/05/20/480ba4ee-1ec7-11e6-82c2-a7dcb313287d_video.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.washingtonpost.com/posttv/world/iraq-to-track-down-antiquities-after-islamic-state-museum-rampage/2015/02/28/7c57e916-bf86-11e4-9dfb-03366e719af8_video.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
@@ -35,73 +36,8 @@ class WashingtonPostIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
|
||||
video_id, transform_source=strip_jsonp)[0]['contentConfig']
|
||||
title = video_data['title']
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
for s in video_data.get('streams', []):
|
||||
s_url = s.get('url')
|
||||
if not s_url or s_url in urls:
|
||||
continue
|
||||
urls.append(s_url)
|
||||
video_type = s.get('type')
|
||||
if video_type == 'smil':
|
||||
continue
|
||||
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
width = m3u8_format.get('width')
|
||||
if not width:
|
||||
continue
|
||||
vbr = self._search_regex(
|
||||
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
|
||||
if vbr:
|
||||
m3u8_format.update({
|
||||
'vbr': int_or_none(vbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
else:
|
||||
width = int_or_none(s.get('width'))
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
has_width = width != 0
|
||||
formats.append({
|
||||
'format_id': (
|
||||
'%s-%d-%d' % (video_type, width, vbr)
|
||||
if width
|
||||
else video_type),
|
||||
'vbr': vbr if has_width else None,
|
||||
'width': width,
|
||||
'height': int_or_none(s.get('height')),
|
||||
'acodec': s.get('audioCodec'),
|
||||
'vcodec': s.get('videoCodec') if has_width else 'none',
|
||||
'filesize': int_or_none(s.get('fileSize')),
|
||||
'url': s_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
|
||||
})
|
||||
source_media_url = video_data.get('sourceMediaURL')
|
||||
if source_media_url:
|
||||
formats.append({
|
||||
'format_id': 'source_media',
|
||||
'url': source_media_url,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('blurb'),
|
||||
'uploader': video_data.get('credits', {}).get('source'),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('videoDuration'), 100),
|
||||
'timestamp': int_or_none(
|
||||
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
|
||||
}
|
||||
return self.url_result(
|
||||
'arcpublishing:wapo:' + video_id, 'ArcPublishing', video_id)
|
||||
|
||||
|
||||
class WashingtonPostArticleIE(InfoExtractor):
|
||||
@@ -121,9 +57,8 @@ class WashingtonPostArticleIE(InfoExtractor):
|
||||
'title': 'Breaking Points: The Paper Mine',
|
||||
'duration': 1290,
|
||||
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1395527908,
|
||||
'upload_date': '20140322',
|
||||
'timestamp': 1395440416,
|
||||
'upload_date': '20140321',
|
||||
},
|
||||
}, {
|
||||
'md5': '1fff6a689d8770966df78c8cb6c8c17c',
|
||||
@@ -133,9 +68,8 @@ class WashingtonPostArticleIE(InfoExtractor):
|
||||
'title': 'The town bureaucracy sustains',
|
||||
'description': 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it\'s like to do paperwork 230 feet underground.',
|
||||
'duration': 2220,
|
||||
'timestamp': 1395528005,
|
||||
'upload_date': '20140322',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1395441819,
|
||||
'upload_date': '20140321',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
@@ -151,8 +85,7 @@ class WashingtonPostArticleIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'Washington Post transportation reporter Ashley Halsey III explains why a plane\'s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.',
|
||||
'upload_date': '20141230',
|
||||
'uploader': 'The Washington Post',
|
||||
'timestamp': 1419974765,
|
||||
'timestamp': 1419972442,
|
||||
'title': 'Why black boxes don’t transmit data in real time',
|
||||
}
|
||||
}]
|
||||
|
@@ -5,79 +5,34 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
|
||||
class WistiaBaseIE(InfoExtractor):
|
||||
_VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
|
||||
_VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
|
||||
_EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'md5': 'cafeb56ec0c53c18c97405eecb3133df',
|
||||
'info_dict': {
|
||||
'id': 'sh7fpupwlt',
|
||||
'ext': 'mov',
|
||||
'title': 'Being Resourceful',
|
||||
'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
|
||||
'upload_date': '20131204',
|
||||
'timestamp': 1386185018,
|
||||
'duration': 117,
|
||||
},
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = WistiaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
urls.append(unescapeHTML(match.group('url')))
|
||||
for match in re.finditer(
|
||||
r'''(?sx)
|
||||
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
|
||||
''', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data_json = self._download_json(
|
||||
self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
|
||||
# Some videos require this.
|
||||
headers={
|
||||
'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
|
||||
def _download_embed_config(self, config_type, config_id, referer):
|
||||
base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
|
||||
embed_config = self._download_json(
|
||||
base_url + '.json', config_id, headers={
|
||||
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
|
||||
})
|
||||
|
||||
if data_json.get('error'):
|
||||
if isinstance(embed_config, dict) and embed_config.get('error'):
|
||||
raise ExtractorError(
|
||||
'Error while getting the playlist', expected=True)
|
||||
|
||||
data = data_json['media']
|
||||
return embed_config
|
||||
|
||||
def _extract_media(self, embed_config):
|
||||
data = embed_config['media']
|
||||
video_id = data['hashedId']
|
||||
title = data['name']
|
||||
|
||||
formats = []
|
||||
@@ -160,3 +115,85 @@ class WistiaIE(InfoExtractor):
|
||||
'timestamp': int_or_none(data.get('createdAt')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class WistiaIE(WistiaBaseIE):
|
||||
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
|
||||
|
||||
_TESTS = [{
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
|
||||
'info_dict': {
|
||||
'id': '807fafadvk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drip Brennan Dunn Workshop',
|
||||
'description': 'a JV Webinars video',
|
||||
'upload_date': '20160518',
|
||||
'timestamp': 1463607249,
|
||||
'duration': 4987.11,
|
||||
},
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = WistiaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
urls.append(unescapeHTML(match.group('url')))
|
||||
for match in re.finditer(
|
||||
r'''(?sx)
|
||||
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
|
||||
''', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
embed_config = self._download_embed_config('media', video_id, url)
|
||||
return self._extract_media(embed_config)
|
||||
|
||||
|
||||
class WistiaPlaylistIE(WistiaBaseIE):
|
||||
_VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
|
||||
'info_dict': {
|
||||
'id': 'aodt9etokc',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist = self._download_embed_config('playlist', playlist_id, url)
|
||||
|
||||
entries = []
|
||||
for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
|
||||
embed_config = media.get('embed_config')
|
||||
if not embed_config:
|
||||
continue
|
||||
entries.append(self._extract_media(embed_config))
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -1,23 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class YandexDiskIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
yadi\.sk|
|
||||
disk\.yandex\.
|
||||
(?:
|
||||
az|
|
||||
by|
|
||||
co(?:m(?:\.(?:am|ge|tr))?|\.il)|
|
||||
ee|
|
||||
fr|
|
||||
k[gz]|
|
||||
l[tv]|
|
||||
md|
|
||||
t[jm]|
|
||||
u[az]|
|
||||
ru
|
||||
)
|
||||
)/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
|
||||
'md5': '33955d7ae052f15853dc41f35f17581c',
|
||||
'md5': 'a4a8d52958c8fddcf9845935070402ae',
|
||||
'info_dict': {
|
||||
'id': 'VdOeDou8eZs6Y',
|
||||
'ext': 'mp4',
|
||||
@@ -27,92 +47,101 @@ class YandexDiskIE(InfoExtractor):
|
||||
'uploader_id': '300043621',
|
||||
'view_count': int,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
status = self._download_webpage(
|
||||
'https://disk.yandex.com/auth/status', video_id, query={
|
||||
'urlOrigin': url,
|
||||
'source': 'public',
|
||||
'md5': 'false',
|
||||
})
|
||||
|
||||
sk = self._search_regex(
|
||||
r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
|
||||
status, 'sk', group='value')
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
store = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
|
||||
webpage, 'store'), video_id)
|
||||
resource = store['resources'][store['rootResourceId']]
|
||||
|
||||
models = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
|
||||
webpage, 'video JSON'),
|
||||
video_id)
|
||||
title = resource['name']
|
||||
meta = resource.get('meta') or {}
|
||||
|
||||
data = next(
|
||||
model['data'] for model in models
|
||||
if model.get('model') == 'resource')
|
||||
public_url = meta.get('short_url')
|
||||
if public_url:
|
||||
video_id = self._match_id(public_url)
|
||||
|
||||
video_hash = data['id']
|
||||
title = data['name']
|
||||
source_url = (self._download_json(
|
||||
'https://cloud-api.yandex.net/v1/disk/public/resources/download',
|
||||
video_id, query={'public_key': url}, fatal=False) or {}).get('href')
|
||||
video_streams = resource.get('videoStreams') or {}
|
||||
video_hash = resource.get('hash') or url
|
||||
environment = store.get('environment') or {}
|
||||
sk = environment.get('sk')
|
||||
yandexuid = environment.get('yandexuid')
|
||||
if sk and yandexuid and not (source_url and video_streams):
|
||||
self._set_cookie(domain, 'yandexuid', yandexuid)
|
||||
|
||||
models = self._download_json(
|
||||
'https://disk.yandex.com/models/', video_id,
|
||||
data=urlencode_postdata({
|
||||
'_model.0': 'videoInfo',
|
||||
'id.0': video_hash,
|
||||
'_model.1': 'do-get-resource-url',
|
||||
'id.1': video_hash,
|
||||
'version': '13.6',
|
||||
'sk': sk,
|
||||
}), query={'_m': 'videoInfo'})['models']
|
||||
|
||||
videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
|
||||
source_url = try_get(
|
||||
models, lambda x: x[1]['data']['file'], compat_str)
|
||||
def call_api(action):
|
||||
return (self._download_json(
|
||||
urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
|
||||
'hash': video_hash,
|
||||
'sk': sk,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'text/plain',
|
||||
}, fatal=False) or {}).get('data') or {}
|
||||
if not source_url:
|
||||
# TODO: figure out how to detect if download limit has
|
||||
# been reached and then avoid unnecessary source format
|
||||
# extraction requests
|
||||
source_url = call_api('download-url').get('url')
|
||||
if not video_streams:
|
||||
video_streams = call_api('get-video-streams')
|
||||
|
||||
formats = []
|
||||
if source_url:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': 'source',
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
|
||||
'quality': 1,
|
||||
'filesize': int_or_none(meta.get('size'))
|
||||
})
|
||||
for video in videos:
|
||||
|
||||
for video in (video_streams.get('videos') or []):
|
||||
format_url = video.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
if video.get('dimension') == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
format_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
size = video.get('size') or {}
|
||||
height = int_or_none(size.get('height'))
|
||||
format_id = 'hls'
|
||||
if height:
|
||||
format_id += '-%dp' % height
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'protocol': 'm3u8_native',
|
||||
'url': format_url,
|
||||
'width': int_or_none(size.get('width')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(try_get(
|
||||
models, lambda x: x[0]['data']['duration']), 1000)
|
||||
uploader = try_get(
|
||||
data, lambda x: x['user']['display_name'], compat_str)
|
||||
uploader_id = try_get(
|
||||
data, lambda x: x['user']['uid'], compat_str)
|
||||
view_count = int_or_none(try_get(
|
||||
data, lambda x: x['meta']['views_counter']))
|
||||
uid = resource.get('uid')
|
||||
display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'duration': float_or_none(video_streams.get('duration'), 1000),
|
||||
'uploader': display_name,
|
||||
'uploader_id': uid,
|
||||
'view_count': int_or_none(meta.get('views_counter')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -13,26 +14,30 @@ class YandexVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
|
||||
yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=|
|
||||
frontend\.vh\.yandex\.ru/player/
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
(?P<id>(?:[\da-f]{32}|[\w-]{12}))
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
'md5': '33955d7ae052f15853dc41f35f17581c',
|
||||
'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374',
|
||||
'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4',
|
||||
'info_dict': {
|
||||
'id': '4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 0,
|
||||
'duration': 30,
|
||||
'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь',
|
||||
'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1549972939,
|
||||
'duration': 5575,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20190212',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
|
||||
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
@@ -52,53 +57,88 @@ class YandexVideoIE(InfoExtractor):
|
||||
# DASH with DRM
|
||||
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
content = self._download_json(
|
||||
'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
|
||||
video_id, query={
|
||||
'stream_options': 'hires',
|
||||
'disable_trackings': 1,
|
||||
})['content']
|
||||
player = try_get((self._download_json(
|
||||
'https://frontend.vh.yandex.ru/graphql', video_id, data=('''{
|
||||
player(content_id: "%s") {
|
||||
computed_title
|
||||
content_url
|
||||
description
|
||||
dislikes
|
||||
duration
|
||||
likes
|
||||
program_title
|
||||
release_date
|
||||
release_date_ut
|
||||
release_year
|
||||
restriction_age
|
||||
season
|
||||
start_time
|
||||
streams
|
||||
thumbnail
|
||||
title
|
||||
views_count
|
||||
}
|
||||
}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content'])
|
||||
if not player or player.get('error'):
|
||||
player = self._download_json(
|
||||
'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
|
||||
video_id, query={
|
||||
'stream_options': 'hires',
|
||||
'disable_trackings': 1,
|
||||
})
|
||||
content = player['content']
|
||||
|
||||
content_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content['streams'][0]['url'])
|
||||
title = content.get('title') or content.get('computed_title')
|
||||
title = content.get('title') or content['computed_title']
|
||||
|
||||
ext = determine_ext(content_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mpd':
|
||||
formats = self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash')
|
||||
else:
|
||||
formats = [{'url': content_url}]
|
||||
formats = []
|
||||
streams = content.get('streams') or []
|
||||
streams.append({'url': content.get('content_url')})
|
||||
for stream in streams:
|
||||
content_url = url_or_none(stream.get('url'))
|
||||
if not content_url:
|
||||
continue
|
||||
ext = determine_ext(content_url)
|
||||
if ext == 'ismc':
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({'url': content_url})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = content.get('description')
|
||||
thumbnail = content.get('thumbnail')
|
||||
timestamp = (int_or_none(content.get('release_date'))
|
||||
or int_or_none(content.get('release_date_ut'))
|
||||
or int_or_none(content.get('start_time')))
|
||||
duration = int_or_none(content.get('duration'))
|
||||
series = content.get('program_title')
|
||||
age_limit = int_or_none(content.get('restriction_age'))
|
||||
season = content.get('season') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'description': content.get('description'),
|
||||
'thumbnail': content.get('thumbnail'),
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'age_limit': age_limit,
|
||||
'duration': int_or_none(content.get('duration')),
|
||||
'series': content.get('program_title'),
|
||||
'age_limit': int_or_none(content.get('restriction_age')),
|
||||
'view_count': int_or_none(content.get('views_count')),
|
||||
'like_count': int_or_none(content.get('likes')),
|
||||
'dislike_count': int_or_none(content.get('dislikes')),
|
||||
'season_number': int_or_none(season.get('season_number')),
|
||||
'season_id': season.get('id'),
|
||||
'release_year': int_or_none(content.get('release_year')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
||||
from ..swfinterp import SWFInterpreter
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -279,6 +280,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
|
||||
def _call_api(self, ep, query, video_id):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
@@ -296,10 +298,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_yt_initial_data(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
|
||||
(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
|
||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||
video_id)
|
||||
|
||||
def _extract_ytcfg(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), video_id, fatal=False)
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
@@ -315,7 +323,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
||||
(?:(?:www|dev)\.)?invidio\.us/|
|
||||
(?:(?:www|no)\.)?invidiou\.sh/|
|
||||
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
|
||||
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.mastodon\.host/|
|
||||
@@ -1096,6 +1104,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# another example of '};' in ytInitialData
|
||||
'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -1316,17 +1333,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return self._parse_json(
|
||||
uppercase_escape(config), video_id, fatal=False)
|
||||
|
||||
def _get_automatic_captions(self, video_id, webpage):
|
||||
def _get_automatic_captions(self, video_id, player_response, player_config):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
player_config = self._get_ytplayer_config(video_id, webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
if not player_config:
|
||||
if not (player_response or player_config):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
try:
|
||||
args = player_config['args']
|
||||
args = player_config.get('args') if player_config else {}
|
||||
caption_url = args.get('ttsurl')
|
||||
if caption_url:
|
||||
timestamp = args['timestamp']
|
||||
@@ -1385,19 +1401,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return captions
|
||||
|
||||
# New captions format as of 22.06.2017
|
||||
player_response = args.get('player_response')
|
||||
if player_response and isinstance(player_response, compat_str):
|
||||
player_response = self._parse_json(
|
||||
player_response, video_id, fatal=False)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
@@ -1646,6 +1658,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
ytplayer_config = None
|
||||
|
||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||
age_gate = True
|
||||
@@ -1699,7 +1712,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info and not player_response:
|
||||
player_response = extract_player_response(
|
||||
self._search_regex(
|
||||
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||
(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
|
||||
'initial player response', default='{}'),
|
||||
video_id)
|
||||
@@ -2270,7 +2283,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
|
||||
|
||||
video_duration = try_get(
|
||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||
@@ -2283,16 +2296,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# annotations
|
||||
video_annotations = None
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
xsrf_token = None
|
||||
ytcfg = self._extract_ytcfg(video_id, video_webpage)
|
||||
if ytcfg:
|
||||
xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
|
||||
if not xsrf_token:
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
invideo_url = try_get(
|
||||
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
|
||||
if xsrf_token and invideo_url:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
xsrf_field_name = None
|
||||
if ytcfg:
|
||||
xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
|
||||
if not xsrf_field_name:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
video_annotations = self._download_webpage(
|
||||
self._proto_relative_url(invideo_url),
|
||||
video_id, note='Downloading annotations',
|
||||
@@ -2431,7 +2453,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
)/
|
||||
(?:
|
||||
(?:channel|c|user|feed)/|
|
||||
(?:playlist|watch)\?.*?\blist=
|
||||
(?:playlist|watch)\?.*?\blist=|
|
||||
(?!(?:watch|embed|v|e)\b)
|
||||
)
|
||||
(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
@@ -2700,13 +2723,27 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
# inline playlist with not always working continuations
|
||||
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
|
||||
'only_matching': True,
|
||||
}
|
||||
# TODO
|
||||
# {
|
||||
# 'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
# 'only_matching': True,
|
||||
# }
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/course',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/zsecurity',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.youtube.com/NASAgovVideo/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if YoutubeIE.suitable(url) else super(
|
||||
YoutubeTabIE, cls).suitable(url)
|
||||
|
||||
def _extract_channel_id(self, webpage):
|
||||
channel_id = self._html_search_meta(
|
||||
@@ -2988,10 +3025,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
for page_num in itertools.count(1):
|
||||
if not continuation:
|
||||
break
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/browse_ajax', None,
|
||||
'Downloading page %d' % page_num,
|
||||
headers=headers, query=continuation, fatal=False)
|
||||
count = 0
|
||||
retries = 3
|
||||
while count <= retries:
|
||||
try:
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# that is usually worked around with a retry
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/browse_ajax', None,
|
||||
'Downloading page %d%s'
|
||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||
headers=headers, query=continuation)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||
count += 1
|
||||
if count <= retries:
|
||||
continue
|
||||
raise
|
||||
if not browse:
|
||||
break
|
||||
response = try_get(browse, lambda x: x[1]['response'], dict)
|
||||
@@ -3130,10 +3181,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
playlist_title=title)
|
||||
|
||||
def _extract_identity_token(self, webpage, item_id):
|
||||
ytcfg = self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), item_id, fatal=False)
|
||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||
if ytcfg:
|
||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||
if token:
|
||||
|
@@ -85,7 +85,13 @@ class ZypeIE(InfoExtractor):
|
||||
else:
|
||||
m3u8_url = self._search_regex(
|
||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
|
||||
body, 'm3u8 url', group='url')
|
||||
body, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
source = self._parse_json(self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
|
||||
'source'), video_id, js_to_json)
|
||||
if source.get('integration') == 'verizon-media':
|
||||
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
text_tracks = self._search_regex(
|
||||
|
@@ -3640,7 +3640,7 @@ def url_or_none(url):
|
||||
if not url or not isinstance(url, compat_str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
|
||||
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2020.12.14'
|
||||
__version__ = '2021.01.03'
|
||||
|
Reference in New Issue
Block a user