mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 14:18:37 +09:00
Compare commits
85 Commits
76fe4ba3b2
...
2020.12.31
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4066945919 | ||
![]() |
2a84694b1e | ||
![]() |
4046ffe1e1 | ||
![]() |
d1d0612160 | ||
![]() |
7b0f04ed1f | ||
![]() |
2e21b06ea2 | ||
![]() |
a6f75e6e89 | ||
![]() |
bd18824c2a | ||
![]() |
bdd044e67b | ||
![]() |
f7e95fb2a0 | ||
![]() |
9dd674e1d2 | ||
![]() |
9c1e164e0c | ||
![]() |
c706fbe9fe | ||
![]() |
ebdcf70b0d | ||
![]() |
5966095e65 | ||
![]() |
9ee984fc76 | ||
![]() |
53528e1d23 | ||
![]() |
c931c4b8dd | ||
![]() |
7acd042bbb | ||
![]() |
bcfe485e01 | ||
![]() |
479cc6d5a1 | ||
![]() |
38286ee729 | ||
![]() |
1a95953867 | ||
![]() |
71febd1c52 | ||
![]() |
f1bc56c99b | ||
![]() |
64e419bd73 | ||
![]() |
782ea947b4 | ||
![]() |
f27224d57b | ||
![]() |
c007188598 | ||
![]() |
af93ecfd88 | ||
![]() |
794771a164 | ||
![]() |
6f2eaaf73d | ||
![]() |
4c7a4dbc4d | ||
![]() |
f86b299d0e | ||
![]() |
e474996541 | ||
![]() |
aed617e311 | ||
![]() |
0fa67c1d68 | ||
![]() |
365b3cc72d | ||
![]() |
a272fe21a8 | ||
![]() |
cec1c2f211 | ||
![]() |
12053450dc | ||
![]() |
46cffb0c47 | ||
![]() |
c32a059f52 | ||
![]() |
6911312e53 | ||
![]() |
f22b5a6b96 | ||
![]() |
58e55198c1 | ||
![]() |
d61ed9f2f1 | ||
![]() |
8bc4c6350e | ||
![]() |
cfa4ffa23b | ||
![]() |
4f1dc1463d | ||
![]() |
17e0f41d34 | ||
![]() |
b57b27ff8f | ||
![]() |
bbe8cc6662 | ||
![]() |
98106accb6 | ||
![]() |
af1312bfc3 | ||
![]() |
4c7d7215cd | ||
![]() |
0370d9eb3d | ||
![]() |
1434651d20 | ||
![]() |
2c312ab84a | ||
![]() |
0ee78d62d5 | ||
![]() |
7f3c90ab25 | ||
![]() |
1d3cd29730 | ||
![]() |
4ef1fc9707 | ||
![]() |
f9e6aa1dcf | ||
![]() |
f83db9064b | ||
![]() |
2da9a86399 | ||
![]() |
ecaa535cf4 | ||
![]() |
79dd92b1fe | ||
![]() |
bd3844c9c2 | ||
![]() |
7bf5e3a84a | ||
![]() |
08a17dae5b | ||
![]() |
924ea66ade | ||
![]() |
5b72f5b74f | ||
![]() |
bfa345744d | ||
![]() |
f966461476 | ||
![]() |
b8aea53682 | ||
![]() |
c0d9eb7043 | ||
![]() |
3ba6aabd25 | ||
![]() |
a8b31505ed | ||
![]() |
90a271e914 | ||
![]() |
172d270607 | ||
![]() |
22feed08a1 | ||
![]() |
942b8ca3be | ||
![]() |
3729c52f9d | ||
![]() |
71679eaee8 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.31. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.31**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.12.14
|
||||
[debug] youtube-dl version 2020.12.31
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.31. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.31**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.31. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.31**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.31. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.31**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.12.14
|
||||
[debug] youtube-dl version 2020.12.31
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.31. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.31**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
50
.github/workflows/ci.yml
vendored
Normal file
50
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
name: CI
|
||||
on: [push]
|
||||
jobs:
|
||||
tests:
|
||||
name: Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# TODO: python 2.6
|
||||
python-version: [2.7, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, pypy-2.7, pypy-3.6, pypy-3.7]
|
||||
ytdl-test-set: [core, download]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# python 3.2 is only available on windows via setup-python
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
ytdl-test-set: core
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: 3.2
|
||||
ytdl-test-set: download
|
||||
run-tests-ext: bat
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install nose
|
||||
run: pip install nose
|
||||
- name: Run tests
|
||||
continue-on-error: ${{ matrix.ytdl-test-set == 'download' }}
|
||||
env:
|
||||
YTDL_TEST_SET: ${{ matrix.ytdl-test-set }}
|
||||
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }}
|
||||
flake8:
|
||||
name: Linter
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
106
ChangeLog
106
ChangeLog
@@ -1,3 +1,109 @@
|
||||
version 2020.12.31
|
||||
|
||||
Core
|
||||
* [utils] Accept only supported protocols in url_or_none
|
||||
* [YoutubeDL] Allow format filtering using audio language (#16209)
|
||||
|
||||
Extractors
|
||||
+ [redditr] Extract all thumbnails (#27503)
|
||||
* [vvvvid] Improve info extraction
|
||||
+ [vvvvid] Add support for playlists (#18130, #27574)
|
||||
+ [yandexdisk] Extract info from webpage
|
||||
* [yandexdisk] Fix extraction (#17861, #27131)
|
||||
* [yandexvideo] Use old API call as fallback
|
||||
* [yandexvideo] Fix extraction (#25000)
|
||||
- [nbc] Remove CSNNE extractor
|
||||
* [nbc] Fix NBCSport VPlayer URL extraction (#16640)
|
||||
+ [aenetworks] Add support for biography.com (#3863)
|
||||
* [uktvplay] Match new video URLs (#17909)
|
||||
* [sevenplay] Detect API errors
|
||||
* [tenplay] Fix format extraction (#26653)
|
||||
* [brightcove] Raise error for DRM protected videos (#23467, #27568)
|
||||
|
||||
|
||||
version 2020.12.29
|
||||
|
||||
Extractors
|
||||
* [youtube] Improve yt initial data extraction (#27524)
|
||||
* [youtube:tab] Improve URL matching #27559)
|
||||
* [youtube:tab] Restore retry on browse requests (#27313, #27564)
|
||||
* [aparat] Fix extraction (#22285, #22611, #23348, #24354, #24591, #24904,
|
||||
#25418, #26070, #26350, #26738, #27563)
|
||||
- [brightcove] Remove sonyliv specific code
|
||||
* [piksel] Improve format extraction
|
||||
+ [zype] Add support for uplynk videos
|
||||
+ [toggle] Add support for live.mewatch.sg (#27555)
|
||||
+ [go] Add support for fxnow.fxnetworks.com (#13972, #22467, #23754, #26826)
|
||||
* [teachable] Improve embed detection (#26923)
|
||||
* [mitele] Fix free video extraction (#24624, #25827, #26757)
|
||||
* [telecinco] Fix extraction
|
||||
* [youtube] Update invidious.snopyta.org (#22667)
|
||||
* [amcnetworks] Improve auth only video detection (#27548)
|
||||
+ [generic] Add support for VHX Embeds (#27546)
|
||||
|
||||
|
||||
version 2020.12.26
|
||||
|
||||
Extractors
|
||||
* [instagram] Fix comment count extraction
|
||||
+ [instagram] Add support for reel URLs (#26234, #26250)
|
||||
* [bbc] Switch to media selector v6 (#23232, #23933, #26303, #26432, #26821,
|
||||
#27538)
|
||||
* [instagram] Improve thumbnail extraction
|
||||
* [instagram] Fix extraction when authenticated (#22880, #26377, #26981,
|
||||
#27422)
|
||||
* [spankbang:playlist] Fix extraction (#24087)
|
||||
+ [spankbang] Add support for playlist videos
|
||||
* [pornhub] Improve like and dislike count extraction (#27356)
|
||||
* [pornhub] Fix lq formats extraction (#27386, #27393)
|
||||
+ [bongacams] Add support for bongacams.com (#27440)
|
||||
* [youtube:tab] Extend URL regular expression (#27501)
|
||||
* [theweatherchannel] Fix extraction (#25930, #26051)
|
||||
+ [sprout] Add support for Universal Kids (#22518)
|
||||
* [theplatform] Allow passing geo bypass countries from other extractors
|
||||
+ [wistia] Add support for playlists (#27533)
|
||||
+ [ctv] Add support for ctv.ca (#27525)
|
||||
* [9c9media] Improve info extraction
|
||||
* [youtube] Fix automatic captions extraction (#27162, #27388)
|
||||
* [sonyliv] Fix title for movies
|
||||
* [sonyliv] Fix extraction (#25667)
|
||||
* [streetvoice] Fix extraction (#27455, #27492)
|
||||
+ [facebook] Add support for watchparty pages (#27507)
|
||||
* [cbslocal] Fix video extraction
|
||||
+ [brightcove] Add another method to extract policyKey
|
||||
* [mewatch] Relax URL regular expression (#27506)
|
||||
|
||||
|
||||
version 2020.12.22
|
||||
|
||||
Core
|
||||
* [common] Remove unwanted query params from unsigned akamai manifest URLs
|
||||
|
||||
Extractors
|
||||
- [tastytrade] Remove extractor (#25716)
|
||||
* [niconico] Fix playlist extraction (#27428)
|
||||
- [everyonesmixtape] Remove extractor
|
||||
- [kanalplay] Remove extractor
|
||||
* [arkena] Fix extraction
|
||||
* [nba] Rewrite extractor
|
||||
* [turner] Improve info extraction
|
||||
* [youtube] Improve xsrf token extraction (#27442)
|
||||
* [generic] Improve RSS age limit extraction
|
||||
* [generic] Fix RSS itunes thumbnail extraction (#27405)
|
||||
+ [redditr] Extract duration (#27426)
|
||||
- [zaq1] Remove extractor
|
||||
+ [asiancrush] Add support for retrocrush.tv
|
||||
* [asiancrush] Fix extraction
|
||||
- [noco] Remove extractor (#10864)
|
||||
* [nfl] Fix extraction (#22245)
|
||||
* [skysports] Relax URL regular expression (#27435)
|
||||
+ [tv5unis] Add support for tv5unis.ca (#22399, #24890)
|
||||
+ [videomore] Add support for more.tv (#27088)
|
||||
+ [yandexmusic] Add support for music.yandex.com (#27425)
|
||||
+ [nhk:program] Add support for audio programs and program clips
|
||||
+ [nhk] Add support for NHK video programs (#27230)
|
||||
|
||||
|
||||
version 2020.12.14
|
||||
|
||||
Core
|
||||
|
@@ -1,4 +1,5 @@
|
||||
[](https://travis-ci.com/ytdl-org/youtube-dl)
|
||||
[](https://github.com/ytdl-org/youtube-dl/actions?query=workflow%3ACI)
|
||||
|
||||
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
@@ -677,6 +678,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
|
||||
- `container`: Name of the container format
|
||||
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
|
||||
- `format_id`: A short description of the format
|
||||
- `language`: Language code
|
||||
|
||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
|
||||
|
||||
@@ -879,7 +881,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [Get cookies.txt](https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
|
17
devscripts/run_tests.bat
Normal file
17
devscripts/run_tests.bat
Normal file
@@ -0,0 +1,17 @@
|
||||
@echo off
|
||||
|
||||
rem Keep this list in sync with the `offlinetest` target in Makefile
|
||||
set DOWNLOAD_TESTS="age_restriction^|download^|iqiyi_sdk_interpreter^|socks^|subtitles^|write_annotations^|youtube_lists^|youtube_signature"
|
||||
|
||||
if "%YTDL_TEST_SET%" == "core" (
|
||||
set test_set="-I test_("%DOWNLOAD_TESTS%")\.py"
|
||||
set multiprocess_args=""
|
||||
) else if "%YTDL_TEST_SET%" == "download" (
|
||||
set test_set="-I test_(?!"%DOWNLOAD_TESTS%").+\.py"
|
||||
set multiprocess_args="--processes=4 --process-timeout=540"
|
||||
) else (
|
||||
echo YTDL_TEST_SET is not set or invalid
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
nosetests test --verbose %test_set:"=% %multiprocess_args:"=%
|
@@ -104,6 +104,7 @@
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliPlayer**
|
||||
- **BioBioChileTV**
|
||||
- **Biography**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
@@ -112,6 +113,7 @@
|
||||
- **blinkx**
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BongaCams**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
@@ -146,6 +148,7 @@
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **CBSLocalArticle**
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
@@ -195,9 +198,9 @@
|
||||
- **CrooksAndLiars**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **cu.ntv.co.jp**: Nippon Television Network
|
||||
- **Culturebox**
|
||||
@@ -268,7 +271,6 @@
|
||||
- **ESPNArticle**
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
@@ -315,7 +317,6 @@
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameSpot**
|
||||
@@ -348,6 +349,7 @@
|
||||
- **hgtv.com:show**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
- **history:topic**: History.com Topic
|
||||
- **hitbox**
|
||||
- **hitbox:live**
|
||||
@@ -400,7 +402,6 @@
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
- **KanalPlay**: Kanal 5/9/11 Play
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **KarriereVideos**
|
||||
@@ -541,6 +542,11 @@
|
||||
- **NationalGeographicTV**
|
||||
- **Naver**
|
||||
- **NBA**
|
||||
- **nba:watch**
|
||||
- **nba:watch:collection**
|
||||
- **NBAChannel**
|
||||
- **NBAEmbed**
|
||||
- **NBAWatchEmbed**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **nbcolympics**
|
||||
@@ -570,8 +576,10 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **nfl.com**
|
||||
- **nfl.com** (Currently broken)
|
||||
- **nfl.com:article** (Currently broken)
|
||||
- **NhkVod**
|
||||
- **NhkVodProgram**
|
||||
- **nhl.com**
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
@@ -585,7 +593,6 @@
|
||||
- **njoy:embed**
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **NonkTube**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
@@ -872,7 +879,6 @@
|
||||
- **Tagesschau**
|
||||
- **tagesschau:player**
|
||||
- **Tass**
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
@@ -946,6 +952,8 @@
|
||||
- **TV2DKBornholmPlay**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **tv5unis**
|
||||
- **tv5unis:video**
|
||||
- **tv8.it**
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
@@ -1081,6 +1089,7 @@
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VVVVIDShow**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Wakanim**
|
||||
@@ -1103,6 +1112,7 @@
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **Wistia**
|
||||
- **WistiaPlaylist**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **WorldStarHipHop**
|
||||
- **WSJ**: Wall Street Journal
|
||||
@@ -1165,7 +1175,6 @@
|
||||
- **YoutubeYtBe**
|
||||
- **YoutubeYtUser**
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **Zattoo**
|
||||
- **ZattooLive**
|
||||
- **ZDF**
|
||||
|
@@ -36,7 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
@@ -57,8 +57,8 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
# def test_youtube_user_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||
|
@@ -554,6 +554,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
self.assertEqual(url_or_none('s3://foo.de'), None)
|
||||
self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de')
|
||||
self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de')
|
||||
self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de')
|
||||
self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
|
@@ -1083,7 +1083,7 @@ class YoutubeDL(object):
|
||||
'*=': lambda attr, value: value in attr,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
|
||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||
\s*$
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
@@ -28,6 +29,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
'biography.com': (None, 'biography'),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
@@ -54,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e, GeoRestrictedError):
|
||||
raise
|
||||
last_e = e
|
||||
continue
|
||||
formats.extend(tp_formats)
|
||||
@@ -67,6 +71,34 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
@@ -139,32 +171,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
canonical, query={'filter[canonical]': '/' + canonical})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
|
||||
|
||||
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
@@ -294,3 +301,42 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
return self.url_result(
|
||||
'http://www.history.com/videos/' + display_id,
|
||||
AENetworksIE.ie_key())
|
||||
|
||||
|
||||
class HistoryPlayerIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:player'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_aetn_info(domain, 'id', video_id, url)
|
||||
|
||||
|
||||
class BiographyIE(AENetworksBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
|
||||
'info_dict': {
|
||||
'id': '30322987',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vincent Van Gogh - Full Episode',
|
||||
'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
|
||||
'timestamp': 1311970571,
|
||||
'upload_date': '20110729',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
@@ -80,7 +80,8 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
if properties.get('videoCategory') == 'TVE-Auth':
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
|
@@ -9,7 +9,6 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
# from .anvato_token_generator import NFLTokenGenerator
|
||||
from ..aes import aes_encrypt
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
@@ -204,10 +203,6 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_TOKEN_GENERATORS = {
|
||||
# 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
@@ -267,12 +262,9 @@ class AnvatoIE(InfoExtractor):
|
||||
'anvrid': anvrid,
|
||||
'anvts': server_time,
|
||||
}
|
||||
if access_key in self._TOKEN_GENERATORS:
|
||||
api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
|
||||
else:
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp,
|
||||
|
@@ -1,7 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .nfl import NFLTokenGenerator
|
||||
|
||||
__all__ = [
|
||||
'NFLTokenGenerator',
|
||||
]
|
@@ -1,6 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
class TokenGenerator:
|
||||
def generate(self, anvack, mcp_id):
|
||||
raise NotImplementedError('This method must be implemented by subclasses')
|
@@ -1,30 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import TokenGenerator
|
||||
|
||||
|
||||
class NFLTokenGenerator(TokenGenerator):
|
||||
_AUTHORIZATION = None
|
||||
|
||||
def generate(ie, anvack, mcp_id):
|
||||
if not NFLTokenGenerator._AUTHORIZATION:
|
||||
reroute = ie._download_json(
|
||||
'https://api.nfl.com/v1/reroute', mcp_id,
|
||||
data=b'grant_type=client_credentials',
|
||||
headers={'X-Domain-Id': 100})
|
||||
NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token'])
|
||||
return ie._download_json(
|
||||
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
|
||||
'query': '''{
|
||||
viewer {
|
||||
mediaToken(anvack: "%s", id: %s) {
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}).encode(), headers={
|
||||
'Authorization': NFLTokenGenerator._AUTHORIZATION,
|
||||
'Content-Type': 'application/json',
|
||||
})['data']['viewer']['mediaToken']['token']
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
@@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
options = self._parse_json(self._search_regex(
|
||||
r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
|
||||
|
||||
formats = []
|
||||
for sources in player['multiSRC']:
|
||||
for sources in (options.get('multiSRC') or []):
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
@@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
info['title'] = get_element_by_id('videoTitle', webpage) or \
|
||||
self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'duration': int_or_none(options.get('duration')),
|
||||
'formats': formats,
|
||||
})
|
||||
|
@@ -6,13 +6,11 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
strip_jsonp,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,22 +18,27 @@ class ArkenaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
video\.arkena\.com/play2/embed/player\?|
|
||||
video\.(?:arkena|qbrick)\.com/play2/embed/player\?|
|
||||
play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
|
||||
'url': 'https://video.qbrick.com/play2/embed/player?accountId=1034090&mediaId=d8ab4607-00090107-aab86310',
|
||||
'md5': '97f117754e5f3c020f5f26da4a44ebaf',
|
||||
'info_dict': {
|
||||
'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
|
||||
'id': 'd8ab4607-00090107-aab86310',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'description': 'Royalty free test video',
|
||||
'timestamp': 1432816365,
|
||||
'upload_date': '20150528',
|
||||
'is_live': False,
|
||||
'title': 'EM_HT20_117_roslund_v2.mp4',
|
||||
'timestamp': 1608285912,
|
||||
'upload_date': '20201218',
|
||||
'duration': 1429.162667,
|
||||
'subtitles': {
|
||||
'sv': 'count:3',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893',
|
||||
'only_matching': True,
|
||||
@@ -72,62 +75,89 @@ class ArkenaIE(InfoExtractor):
|
||||
if not video_id or not account_id:
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_'
|
||||
% (video_id, account_id),
|
||||
video_id, transform_source=strip_jsonp)['Playlist'][0]
|
||||
media = self._download_json(
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
})
|
||||
metadata = media.get('metadata') or {}
|
||||
title = metadata['title']
|
||||
|
||||
media_info = playlist['MediaInfo']
|
||||
title = media_info['Title']
|
||||
media_files = playlist['MediaFiles']
|
||||
|
||||
is_live = False
|
||||
duration = None
|
||||
formats = []
|
||||
for kind_case, kind_formats in media_files.items():
|
||||
kind = kind_case.lower()
|
||||
for f in kind_formats:
|
||||
f_url = f.get('Url')
|
||||
if not f_url:
|
||||
continue
|
||||
is_live = f.get('Live') == 'true'
|
||||
exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None))
|
||||
if kind == 'm3u8' or 'm3u8' in exts:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=kind, fatal=False, live=is_live))
|
||||
elif kind == 'flash' or 'f4m' in exts:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f_url, video_id, f4m_id=kind, fatal=False))
|
||||
elif kind == 'dash' or 'mpd' in exts:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
f_url, video_id, mpd_id=kind, fatal=False))
|
||||
elif kind == 'silverlight':
|
||||
# TODO: process when ism is supported (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8118)
|
||||
continue
|
||||
else:
|
||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'format_id': '%s-%d' % (kind, tbr) if tbr else kind,
|
||||
'tbr': tbr,
|
||||
})
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
for resource in media['asset']['resources']:
|
||||
for rendition in (resource.get('renditions') or []):
|
||||
rendition_type = rendition.get('type')
|
||||
for i, link in enumerate(rendition.get('links') or []):
|
||||
href = link.get('href')
|
||||
if not href:
|
||||
continue
|
||||
if rendition_type == 'image':
|
||||
thumbnails.append({
|
||||
'filesize': int_or_none(rendition.get('size')),
|
||||
'height': int_or_none(rendition.get('height')),
|
||||
'id': rendition.get('id'),
|
||||
'url': href,
|
||||
'width': int_or_none(rendition.get('width')),
|
||||
})
|
||||
elif rendition_type == 'subtitle':
|
||||
subtitles.setdefault(rendition.get('language') or 'en', []).append({
|
||||
'url': href,
|
||||
})
|
||||
elif rendition_type == 'video':
|
||||
f = {
|
||||
'filesize': int_or_none(rendition.get('size')),
|
||||
'format_id': rendition.get('id'),
|
||||
'url': href,
|
||||
}
|
||||
video = try_get(rendition, lambda x: x['videos'][i], dict)
|
||||
if video:
|
||||
if not duration:
|
||||
duration = float_or_none(video.get('duration'))
|
||||
f.update({
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'vcodec': video.get('codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
})
|
||||
audio = try_get(video, lambda x: x['audios'][0], dict)
|
||||
if audio:
|
||||
f.update({
|
||||
'acodec': audio.get('codec'),
|
||||
'asr': int_or_none(audio.get('sampleRate')),
|
||||
})
|
||||
formats.append(f)
|
||||
elif rendition_type == 'index':
|
||||
mime_type = link.get('mimeType')
|
||||
if mime_type == 'application/smil+xml':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
href, video_id, fatal=False))
|
||||
elif mime_type == 'application/x-mpegURL':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif mime_type == 'application/hds+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, video_id, f4m_id='hds', fatal=False))
|
||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
href, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = media_info.get('Description')
|
||||
video_id = media_info.get('VideoId') or video_id
|
||||
timestamp = parse_iso8601(media_info.get('PublishDate'))
|
||||
thumbnails = [{
|
||||
'url': thumbnail['Url'],
|
||||
'width': int_or_none(thumbnail.get('Size')),
|
||||
} for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'is_live': is_live,
|
||||
'description': metadata.get('description'),
|
||||
'timestamp': parse_iso8601(media.get('created')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'duration': duration,
|
||||
'tags': media.get('tags'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
_NETRC_MACHINE = 'bbc'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
_MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
|
||||
_MEDIA_SETS = [
|
||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
||||
'iptv-all',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||
|
||||
_NAMESPACES = (
|
||||
_MEDIASELECTION_NS,
|
||||
_EMP_PLAYLIST_NS,
|
||||
)
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||
@@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
@@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
|
||||
def _findall_ns(self, element, xpath):
|
||||
elements = []
|
||||
for ns in self._NAMESPACES:
|
||||
elements.extend(element.findall(xpath % ns))
|
||||
return elements
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
||||
if error is None:
|
||||
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
||||
if error is not None:
|
||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
||||
return self._findall_ns(media_selection, './{%s}media')
|
||||
error = media_selection.get('result')
|
||||
if error:
|
||||
raise BBCCoUkIE.MediaSelectionError(error)
|
||||
return media_selection.get('media') or []
|
||||
|
||||
def _extract_connections(self, media):
|
||||
return self._findall_ns(media, './{%s}connection')
|
||||
return media.get('connection') or []
|
||||
|
||||
def _get_subtitles(self, media, programme_id):
|
||||
subtitles = {}
|
||||
@@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||
if not isinstance(captions, compat_etree_Element):
|
||||
continue
|
||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
||||
subtitles[lang] = [
|
||||
subtitles['en'] = [
|
||||
{
|
||||
'url': connection.get('href'),
|
||||
'ext': 'ttml',
|
||||
},
|
||||
]
|
||||
break
|
||||
return subtitles
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
@@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
last_exception = None
|
||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
||||
for media_set in self._MEDIA_SETS:
|
||||
try:
|
||||
return self._download_media_selector_url(
|
||||
mediaselector_url % programme_id, programme_id)
|
||||
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||
except BBCCoUkIE.MediaSelectionError as e:
|
||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||
last_exception = e
|
||||
@@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
media_selection = self._download_json(
|
||||
url, programme_id, 'Downloading media selection JSON',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
@@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if kind in ('video', 'audio'):
|
||||
bitrate = int_or_none(media.get('bitrate'))
|
||||
encoding = media.get('encoding')
|
||||
service = media.get('service')
|
||||
width = int_or_none(media.get('width'))
|
||||
height = int_or_none(media.get('height'))
|
||||
file_size = int_or_none(media.get('media_file_size'))
|
||||
@@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||
supplier = connection.get('supplier')
|
||||
transfer_format = connection.get('transferFormat')
|
||||
format_id = supplier or conn_kind or protocol
|
||||
if service:
|
||||
format_id = '%s_%s' % (service, format_id)
|
||||
# ASX playlist
|
||||
if supplier == 'asx':
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
@@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
if re.search(self._USP_RE, href):
|
||||
usp_formats = self._extract_m3u8_formats(
|
||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for f in usp_formats:
|
||||
if f.get('height') and f['height'] > 720:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif transfer_format == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not service and not supplier and bitrate:
|
||||
if not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
@@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||
|
||||
error = self._search_regex(
|
||||
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
||||
r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
@@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
|
||||
IE_DESC = 'BBC'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||
|
||||
_MEDIASELECTOR_URLS = [
|
||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
||||
# even not geo restricted at all
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
||||
# notukerror for non UK (?) users (e.g.
|
||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
||||
_MEDIA_SETS = [
|
||||
'mobile-tablet-main',
|
||||
'pc',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
|
60
youtube_dl/extractor/bongacams.py
Normal file
60
youtube_dl/extractor/bongacams.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BongaCamsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://de.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cn.bongacams.com/azumi-8',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
amf = self._download_json(
|
||||
'https://%s/tools/amf.php' % host, channel_id,
|
||||
data=urlencode_postdata((
|
||||
('method', 'getRoomData'),
|
||||
('args[]', channel_id),
|
||||
('args[]', 'false'),
|
||||
)), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
server_url = amf['localData']['videoServerUrl']
|
||||
|
||||
uploader_id = try_get(
|
||||
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
|
||||
uploader = try_get(
|
||||
amf, lambda x: x['performerData']['displayName'], compat_str)
|
||||
like_count = int_or_none(try_get(
|
||||
amf, lambda x: x['performerData']['loversCount']))
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(uploader or uploader_id),
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': like_count,
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
@@ -28,6 +28,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
@@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
num_drm_sources = 0
|
||||
formats = []
|
||||
for source in json_data.get('sources', []):
|
||||
sources = json_data.get('sources') or []
|
||||
for source in sources:
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
if container == 'WVM' or source.get('key_systems'):
|
||||
num_drm_sources += 1
|
||||
continue
|
||||
elif ext == 'ism':
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if not formats:
|
||||
errors = json_data.get('errors')
|
||||
if errors:
|
||||
error = errors[0]
|
||||
raise ExtractorError(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
if sources and num_drm_sources == len(sources):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -600,24 +601,27 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||
|
||||
def extract_policy_key():
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
% (account_id, player_id, embed), video_id)
|
||||
|
||||
policy_key = None
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||
config = self._download_json(
|
||||
base_url + 'config.json', video_id, fatal=False) or {}
|
||||
policy_key = try_get(
|
||||
config, lambda x: x['video_cloud']['policy_key'])
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
webpage = self._download_webpage(
|
||||
base_url + 'index.min.js', video_id)
|
||||
|
||||
catalog = self._search_regex(
|
||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||
if catalog:
|
||||
catalog = self._parse_json(
|
||||
js_to_json(catalog), video_id, fatal=False)
|
||||
if catalog:
|
||||
policy_key = catalog.get('policyKey')
|
||||
|
||||
if not policy_key:
|
||||
policy_key = self._search_regex(
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
@@ -11,7 +11,47 @@ from ..utils import (
|
||||
|
||||
|
||||
class CBSLocalIE(AnvatoIE):
|
||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mcp_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||
|
||||
|
||||
class CBSLocalArticleIE(AnvatoIE):
|
||||
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Anvato backend
|
||||
@@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||
'info_dict': {
|
||||
'id': '3580809',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
},
|
||||
'categories': [
|
||||
'Stations\\Spoken Word\\WCBSTV',
|
||||
'Syndication\\AOL',
|
||||
'Syndication\\MSN',
|
||||
'Syndication\\NDN',
|
||||
'Syndication\\Yahoo',
|
||||
'Content\\News',
|
||||
'Content\\News\\Local News',
|
||||
],
|
||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE):
|
||||
config['data_src'] % path, page_title, {
|
||||
'default': {
|
||||
'media_src': config['media_src'],
|
||||
}
|
||||
},
|
||||
'f4m': {
|
||||
'host': 'cnn-vh.akamaihd.net',
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
|
@@ -2605,6 +2605,13 @@ class InfoExtractor(object):
|
||||
return entries
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
signed = 'hdnea=' in manifest_url
|
||||
if not signed:
|
||||
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
|
||||
manifest_url = re.sub(
|
||||
r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
|
||||
'', manifest_url).strip('?')
|
||||
|
||||
formats = []
|
||||
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
@@ -2630,7 +2637,7 @@ class InfoExtractor(object):
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
|
||||
if http_host and m3u8_formats and not signed:
|
||||
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
|
52
youtube_dl/extractor/ctv.py
Normal file
52
youtube_dl/extractor/ctv.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||
'info_dict': {
|
||||
'id': '2102249',
|
||||
'ext': 'flv',
|
||||
'title': 'Wednesday, December 23, 2020',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||
'timestamp': 1608732000,
|
||||
'upload_date': '20201223',
|
||||
'series': 'Your Morning',
|
||||
'season': '2020-2021',
|
||||
'season_number': 5,
|
||||
'episode_number': 88,
|
||||
'tags': ['Your Morning'],
|
||||
'categories': ['Talk Show'],
|
||||
'duration': 7467.126,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||
'query': '''{
|
||||
resolvedPath(path: "/%s") {
|
||||
lastSegment {
|
||||
content {
|
||||
... on AxisContent {
|
||||
axisId
|
||||
videoPlayerDestCode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % display_id,
|
||||
})['data']['resolvedPath']['lastSegment']['content']
|
||||
video_id = content['axisId']
|
||||
return self.url_result(
|
||||
'9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
|
||||
'NineCNineMedia', video_id)
|
@@ -1,77 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class EveryonesMixtapeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
|
||||
'info_dict': {
|
||||
'id': '5bfseWNmlds',
|
||||
'ext': 'mp4',
|
||||
'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
|
||||
'uploader': 'FKR.TV',
|
||||
'uploader_id': 'frenchkissrecords',
|
||||
'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
|
||||
'upload_date': '20081015'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # This is simply YouTube
|
||||
}
|
||||
}, {
|
||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
|
||||
'info_dict': {
|
||||
'id': 'm7m0jJAbMQi',
|
||||
'title': 'Driving',
|
||||
},
|
||||
'playlist_count': 24
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
|
||||
pllist_req = sanitized_Request(pllist_url)
|
||||
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
|
||||
playlist_list = self._download_json(
|
||||
pllist_req, playlist_id, note='Downloading playlist metadata')
|
||||
try:
|
||||
playlist_no = next(playlist['id']
|
||||
for playlist in playlist_list
|
||||
if playlist['code'] == playlist_id)
|
||||
except StopIteration:
|
||||
raise ExtractorError('Playlist id not found')
|
||||
|
||||
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
|
||||
pl_req = sanitized_Request(pl_url)
|
||||
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
playlist = self._download_json(
|
||||
pl_req, playlist_id, note='Downloading playlist info')
|
||||
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'url': t['url'],
|
||||
'title': t['title'],
|
||||
} for t in playlist['tracks']]
|
||||
|
||||
if mobj.group('songnr'):
|
||||
songnr = int(mobj.group('songnr')) - 1
|
||||
return entries[songnr]
|
||||
|
||||
playlist_title = playlist['mixData']['name']
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': playlist_title,
|
||||
'entries': entries,
|
||||
}
|
@@ -33,6 +33,8 @@ from .aenetworks import (
|
||||
AENetworksCollectionIE,
|
||||
AENetworksShowIE,
|
||||
HistoryTopicIE,
|
||||
HistoryPlayerIE,
|
||||
BiographyIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
@@ -119,6 +121,7 @@ from .bleacherreport import (
|
||||
from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .bpb import BpbIE
|
||||
@@ -163,7 +166,10 @@ from .cbc import (
|
||||
CBCOlympicsIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
)
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
@@ -241,6 +247,7 @@ from .crunchyroll import (
|
||||
)
|
||||
from .cspan import CSpanIE
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
@@ -327,7 +334,6 @@ from .espn import (
|
||||
)
|
||||
from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
@@ -395,7 +401,6 @@ from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gamespot import GameSpotIE
|
||||
@@ -501,7 +506,6 @@ from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
from .kankan import KankanIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .karrierevideos import KarriereVideosIE
|
||||
@@ -679,9 +683,15 @@ from .nationalgeographic import (
|
||||
NationalGeographicTVIE,
|
||||
)
|
||||
from .naver import NaverIE
|
||||
from .nba import NBAIE
|
||||
from .nba import (
|
||||
NBAWatchEmbedIE,
|
||||
NBAWatchIE,
|
||||
NBAWatchCollectionIE,
|
||||
NBAEmbedIE,
|
||||
NBAIE,
|
||||
NBAChannelIE,
|
||||
)
|
||||
from .nbc import (
|
||||
CSNNEIE,
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
@@ -1123,7 +1133,6 @@ from .tagesschau import (
|
||||
TagesschauIE,
|
||||
)
|
||||
from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
@@ -1416,7 +1425,10 @@ from .vshare import VShareIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
from .vvvvid import VVVVIDIE
|
||||
from .vvvvid import (
|
||||
VVVVIDIE,
|
||||
VVVVIDShowIE,
|
||||
)
|
||||
from .vyborymos import VyboryMosIE
|
||||
from .vzaar import VzaarIE
|
||||
from .wakanim import WakanimIE
|
||||
@@ -1447,7 +1459,10 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wistia import WistiaIE
|
||||
from .wistia import (
|
||||
WistiaIE,
|
||||
WistiaPlaylistIE,
|
||||
)
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import socket
|
||||
|
||||
@@ -8,6 +9,7 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_http_client,
|
||||
compat_str,
|
||||
compat_urllib_error,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
groups/[^/]+/permalink/
|
||||
groups/[^/]+/permalink/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
@@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||
'info_dict': {
|
||||
'id': '211641140192478',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
'graphURI': '/api/graphql/'
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||
|
||||
def extract_relay_prefetched_data(_filter):
|
||||
replay_data = extract_relay_data(_filter)
|
||||
for require in (replay_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||
@@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
graphql_data = self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
|
||||
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
|
||||
for require in (graphql_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
entries = []
|
||||
data = extract_relay_prefetched_data(
|
||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||
if data:
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
@@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
if not video_data and '/watchparty/' in url:
|
||||
post_data = {
|
||||
'doc_id': 3731964053542869,
|
||||
'variables': json.dumps({
|
||||
'livingRoomID': video_id,
|
||||
}),
|
||||
}
|
||||
|
||||
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||
if prefetched_data:
|
||||
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||
if lsd:
|
||||
post_data[lsd['name']] = lsd['value']
|
||||
|
||||
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||
for define in (relay_data.get('define') or []):
|
||||
if define[0] == 'RelayAPIConfigDefaults':
|
||||
self._api_config = define[2]
|
||||
|
||||
living_room = self._download_json(
|
||||
urljoin(url, self._api_config['graphURI']), video_id,
|
||||
data=urlencode_postdata(post_data))['data']['living_room']
|
||||
|
||||
entries = []
|
||||
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||
v_id = video.get('id')
|
||||
if not v_id:
|
||||
continue
|
||||
v_id = compat_str(v_id)
|
||||
entries.append(self.url_result(
|
||||
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||
self.ie_key(), v_id, video.get('name')))
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
# Video info not in first request, do a secondary request using
|
||||
# tahoe player specific URL
|
||||
tahoe_data = self._download_webpage(
|
||||
|
@@ -1,77 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FXNetworksIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fxnetworks.com/video/1032565827847',
|
||||
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
|
||||
'info_dict': {
|
||||
'id': 'dRzwHC_MMqIv',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NEWA-FNG-FX',
|
||||
'upload_date': '20170825',
|
||||
'timestamp': 1503686274,
|
||||
'episode_number': 0,
|
||||
'season_number': 2,
|
||||
'series': 'Better Things',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
||||
release_url = video_data['rel']
|
||||
title = video_data['data-title']
|
||||
rating = video_data.get('data-rating')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if player_type == 'movies':
|
||||
query.update({
|
||||
'manifest': 'm3u',
|
||||
})
|
||||
else:
|
||||
query.update({
|
||||
'switch': 'http',
|
||||
})
|
||||
if video_data.get('data-req-auth') == '1':
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data['data-channel'], title,
|
||||
video_data.get('data-guid'), rating)
|
||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
||||
'series': video_data.get('data-show-title'),
|
||||
'episode_number': int_or_none(video_data.get('data-episode')),
|
||||
'season_number': int_or_none(video_data.get('data-season')),
|
||||
'thumbnail': video_data.get('data-large-thumb'),
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@@ -35,6 +35,7 @@ from ..utils import (
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
@@ -66,7 +67,10 @@ from .tube8 import Tube8IE
|
||||
from .mofosex import MofosexEmbedIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .youporn import YouPornIE
|
||||
from .vimeo import VimeoIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
from .onionstudios import OnionStudiosIE
|
||||
@@ -217,6 +221,33 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
},
|
||||
# RSS feed with item with description and thumbnails
|
||||
{
|
||||
'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'info_dict': {
|
||||
'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||
'title': 're:.*100% Hydrogen.*',
|
||||
'description': 're:.*In this episode.*',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'm4a',
|
||||
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||
'title': 're:Hydrogen!',
|
||||
'description': 're:.*In this episode we are going.*',
|
||||
'timestamp': 1567977776,
|
||||
'upload_date': '20190908',
|
||||
'duration': 459,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'episode_number': 1,
|
||||
'season_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosures and unsupported link URLs
|
||||
{
|
||||
'url': 'http://www.hellointernet.fm/podcast?format=rss',
|
||||
@@ -1996,22 +2027,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [SpringboardPlatformIE.ie_key()],
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
|
||||
'info_dict': {
|
||||
'id': 'uPDB5I9wfp8',
|
||||
'ext': 'webm',
|
||||
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
|
||||
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
|
||||
'upload_date': '20160219',
|
||||
'uploader': 'Pocoyo - Português (BR)',
|
||||
'uploader_id': 'PocoyoBrazil',
|
||||
},
|
||||
'add_ie': [YoutubeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
|
||||
'info_dict': {
|
||||
@@ -2181,7 +2196,18 @@ class GenericIE(InfoExtractor):
|
||||
# 'params': {
|
||||
# 'force_generic_extractor': True,
|
||||
# },
|
||||
# }
|
||||
# },
|
||||
{
|
||||
# VHX Embed
|
||||
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
|
||||
'info_dict': {
|
||||
'id': '858208',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled',
|
||||
'uploader_id': 'user80538407',
|
||||
'uploader': 'OTT Videos',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2218,10 +2244,10 @@ class GenericIE(InfoExtractor):
|
||||
default=None)
|
||||
|
||||
duration = itunes('duration')
|
||||
explicit = itunes('explicit')
|
||||
if explicit == 'true':
|
||||
explicit = (itunes('explicit') or '').lower()
|
||||
if explicit in ('true', 'yes'):
|
||||
age_limit = 18
|
||||
elif explicit == 'false':
|
||||
elif explicit in ('false', 'no'):
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = None
|
||||
@@ -2234,7 +2260,7 @@ class GenericIE(InfoExtractor):
|
||||
'timestamp': unified_timestamp(
|
||||
xpath_text(it, 'pubDate', default=None)),
|
||||
'duration': int_or_none(duration) or parse_duration(duration),
|
||||
'thumbnail': url_or_none(itunes('image')),
|
||||
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
|
||||
'episode': itunes('title'),
|
||||
'episode_number': int_or_none(itunes('episode')),
|
||||
'season_number': int_or_none(itunes('season')),
|
||||
@@ -2559,6 +2585,10 @@ class GenericIE(InfoExtractor):
|
||||
if vimeo_urls:
|
||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||
|
||||
vhx_url = VHXEmbedIE._extract_url(webpage)
|
||||
if vhx_url:
|
||||
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
|
@@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'resource_id': 'Disney',
|
||||
}
|
||||
},
|
||||
'fxnow.fxnetworks': {
|
||||
'brand': '025',
|
||||
'requestor_id': 'dtci',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?P<sub_domain>%s)\.)?go|
|
||||
(?P<sub_domain_2>abc|freeform|disneynow)
|
||||
(?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
@@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||
'info_dict': {
|
||||
'id': 'VDKA12782841',
|
||||
'ext': 'mp4',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
|
@@ -22,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
|
||||
'timestamp': 1371748545,
|
||||
'upload_date': '20130620',
|
||||
'uploader_id': 'naomipq',
|
||||
'uploader': 'Naomi Leonor Phan-Quang',
|
||||
'uploader': 'B E A U T Y F O R A S H E S',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
@@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
(media, video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
width) = [None] * 12
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -137,59 +140,77 @@ class InstagramIE(InfoExtractor):
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||
if not media:
|
||||
additional_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'additional data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
if additional_data:
|
||||
media = try_get(
|
||||
additional_data, lambda x: x['graphql']['shortcode_media'],
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
width = int_or_none(media.get('dimensions', {}).get('width'))
|
||||
description = try_get(
|
||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||
compat_str) or media.get('caption')
|
||||
thumbnail = media.get('display_src') or media.get('display_url')
|
||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||
uploader = media.get('owner', {}).get('full_name')
|
||||
uploader_id = media.get('owner', {}).get('username')
|
||||
|
||||
def get_count(key, kind):
|
||||
return int_or_none(try_get(
|
||||
def get_count(keys, kind):
|
||||
if not isinstance(keys, (list, tuple)):
|
||||
keys = [keys]
|
||||
for key in keys:
|
||||
count = int_or_none(try_get(
|
||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||
lambda x: x['%ss' % kind]['count'])))
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count('to_comment', 'comment')
|
||||
if count is not None:
|
||||
return count
|
||||
like_count = get_count('preview_like', 'like')
|
||||
comment_count = get_count(
|
||||
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
comments = [{
|
||||
'author': comment.get('user', {}).get('username'),
|
||||
'author_id': comment.get('user', {}).get('id'),
|
||||
'id': comment.get('id'),
|
||||
'text': comment.get('text'),
|
||||
'timestamp': int_or_none(comment.get('created_at')),
|
||||
} for comment in media.get(
|
||||
'comments', {}).get('nodes', []) if comment.get('text')]
|
||||
if not video_url:
|
||||
edges = try_get(
|
||||
media, lambda x: x['edge_sidecar_to_children']['edges'],
|
||||
list) or []
|
||||
if edges:
|
||||
entries = []
|
||||
for edge_num, edge in enumerate(edges, start=1):
|
||||
node = try_get(edge, lambda x: x['node'], dict)
|
||||
if not node:
|
||||
continue
|
||||
node_video_url = url_or_none(node.get('video_url'))
|
||||
if not node_video_url:
|
||||
continue
|
||||
entries.append({
|
||||
'id': node.get('shortcode') or node['id'],
|
||||
'title': 'Video %d' % edge_num,
|
||||
'url': node_video_url,
|
||||
'thumbnail': node.get('display_url'),
|
||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||
'view_count': int_or_none(node.get('video_view_count')),
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, video_id,
|
||||
'Post by %s' % uploader_id if uploader_id else None,
|
||||
description)
|
||||
|
||||
if not video_url:
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
@@ -1,97 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
srt_subtitles_timecode,
|
||||
)
|
||||
|
||||
|
||||
class KanalPlayIE(InfoExtractor):
|
||||
IE_DESC = 'Kanal 5/9/11 Play'
|
||||
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
||||
'info_dict': {
|
||||
'id': '3270012277',
|
||||
'ext': 'flv',
|
||||
'title': 'Saknar både dusch och avlopp',
|
||||
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
||||
'duration': 2636.36,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_subtitles(self, subs):
|
||||
return '\r\n\r\n'.join(
|
||||
'%s\r\n%s --> %s\r\n%s'
|
||||
% (
|
||||
num,
|
||||
srt_subtitles_timecode(item['startMillis'] / 1000.0),
|
||||
srt_subtitles_timecode(item['endMillis'] / 1000.0),
|
||||
item['text'],
|
||||
) for num, item in enumerate(subs, 1))
|
||||
|
||||
def _get_subtitles(self, channel_id, video_id):
|
||||
subs = self._download_json(
|
||||
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
||||
video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
channel_id = mobj.group('channel_id')
|
||||
|
||||
video = self._download_json(
|
||||
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
||||
video_id)
|
||||
|
||||
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
||||
if reasons_for_no_streams:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
||||
expected=True)
|
||||
|
||||
title = video['title']
|
||||
description = video.get('description')
|
||||
duration = float_or_none(video.get('length'), 1000)
|
||||
thumbnail = video.get('posterUrl')
|
||||
|
||||
stream_base_url = video['streamBaseUrl']
|
||||
|
||||
formats = [{
|
||||
'url': stream_base_url,
|
||||
'play_path': stream['source'],
|
||||
'ext': 'flv',
|
||||
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
||||
'rtmp_real_time': True,
|
||||
} for stream in video['streams']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
if video.get('hasSubtitle'):
|
||||
subtitles = self.extract_subtitles(channel_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -1,15 +1,14 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .telecinco import TelecincoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class MiTeleIE(InfoExtractor):
|
||||
class MiTeleIE(TelecincoIE):
|
||||
IE_DESC = 'mitele.es'
|
||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||
|
||||
@@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'timestamp': 1471209401,
|
||||
'upload_date': '20160814',
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
# no explicit title
|
||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||
@@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||
'only_matching': True,
|
||||
@@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor):
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video = pre_player['video']
|
||||
video_id = video['dataMediaId']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
info = content.get('info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
# for some reason only HLS is supported
|
||||
'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
|
||||
'id': video_id,
|
||||
video_info.update({
|
||||
'title': title,
|
||||
'description': info.get('synopsis'),
|
||||
'series': content.get('title'),
|
||||
@@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor):
|
||||
'episode': content.get('subtitle'),
|
||||
'episode_number': int_or_none(info.get('episode_number')),
|
||||
'duration': int_or_none(info.get('duration')),
|
||||
'thumbnail': video.get('dataPoster'),
|
||||
'age_limit': int_or_none(info.get('rating')),
|
||||
'timestamp': parse_iso8601(pre_player.get('publishedTime')),
|
||||
}
|
||||
})
|
||||
return video_info
|
||||
|
@@ -5,33 +5,137 @@ import re
|
||||
|
||||
from .turner import TurnerBaseIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
OnDemandPagedList,
|
||||
remove_start,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class NBAIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
||||
class NBACVPBaseIE(TurnerBaseIE):
|
||||
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||
return self._extract_cvp_info(
|
||||
'http://secure.nba.com/%s' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
}, fatal=fatal)
|
||||
|
||||
|
||||
class NBAWatchBaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
|
||||
|
||||
def _extract_video(self, filter_key, filter_value):
|
||||
video = self._download_json(
|
||||
'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
|
||||
filter_value, query={
|
||||
'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
|
||||
'q': filter_key + ':' + filter_value,
|
||||
'wt': 'json',
|
||||
})['response']['docs'][0]
|
||||
|
||||
video_id = str(video['pid'])
|
||||
title = video['name']
|
||||
|
||||
formats = []
|
||||
m3u8_url = (self._download_json(
|
||||
'https://watch.nba.com/service/publishpoint', video_id, query={
|
||||
'type': 'video',
|
||||
'format': 'json',
|
||||
'id': video_id,
|
||||
}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
|
||||
}, fatal=False) or {}).get('path')
|
||||
if m3u8_url:
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
for f in m3u8_formats:
|
||||
http_f = f.copy()
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': http_f['url'].replace('.m3u8', ''),
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('runtime')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
'tags': video.get('tags'),
|
||||
}
|
||||
|
||||
seo_name = video.get('seoName')
|
||||
if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
|
||||
base_path = ''
|
||||
if seo_name.startswith('teams/'):
|
||||
base_path += seo_name.split('/')[1] + '/'
|
||||
base_path += 'video/'
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
base_path + seo_name + '.xml', video_id, False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
IENAME = 'nba:watch:embed'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://watch.nba.com/embed?id=659395',
|
||||
'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
|
||||
'info_dict': {
|
||||
'id': '659395',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||
'timestamp': 1492228800,
|
||||
'upload_date': '20170415',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_video('pid', video_id)
|
||||
|
||||
|
||||
class NBAWatchIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
||||
'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
|
||||
'info_dict': {
|
||||
'id': '0021200253-okc-bkn-recap',
|
||||
'id': '70946',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thunder vs. Nets',
|
||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||
'duration': 181,
|
||||
'timestamp': 1354638466,
|
||||
'timestamp': 1354597200,
|
||||
'upload_date': '20121204',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||
'only_matching': True,
|
||||
@@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE):
|
||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||
'info_dict': {
|
||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||
'id': '330865',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hawks vs. Cavaliers Game 1',
|
||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||
'duration': 228,
|
||||
'timestamp': 1432134543,
|
||||
'upload_date': '20150520',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
||||
'info_dict': {
|
||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
||||
'ext': 'mp4',
|
||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
||||
'upload_date': '20160216',
|
||||
'timestamp': 1455672000,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'timberwolves',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'params': {
|
||||
# Download the whole playlist takes too long time
|
||||
'playlist_items': '1-30',
|
||||
'timestamp': 1432094400,
|
||||
'upload_date': '20150521',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
||||
'info_dict': {
|
||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
||||
'upload_date': '20141212',
|
||||
'timestamp': 1418418600,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# only CVP mp4 format available
|
||||
'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 30
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
|
||||
if collection_id:
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
|
||||
else:
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
|
||||
return self.url_result(
|
||||
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||
return self._extract_video('seoName', display_id)
|
||||
|
||||
def _fetch_page(self, team, video_id, page):
|
||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
|
||||
'type': 'teamvideo',
|
||||
'start': page * self._PAGE_SIZE + 1,
|
||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
||||
'sort': 'recent',
|
||||
'output': 'json',
|
||||
'site': team,
|
||||
})
|
||||
results = self._download_json(
|
||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
||||
for item in results:
|
||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
||||
|
||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
||||
team = orig_path.split('/')[0]
|
||||
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||
IE_NAME = 'nba:watch:collection'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.nba.com/list/collection/season-preview-2020',
|
||||
'info_dict': {
|
||||
'id': 'season-preview-2020',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
if self._downloader.params.get('noplaylist'):
|
||||
self.to_screen('Downloading just video because of --no-playlist')
|
||||
video_path = self._search_regex(
|
||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
||||
return self.url_result(video_url)
|
||||
|
||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, video_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, team, playlist_title)
|
||||
def _fetch_page(self, collection_id, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
|
||||
collection_id, 'Downloading page %d JSON metadata' % page, query={
|
||||
'count': self._PAGE_SIZE,
|
||||
'page': page,
|
||||
})['results']['videos']
|
||||
for video in videos:
|
||||
program = video.get('program') or {}
|
||||
seo_name = program.get('seoName') or program.get('slug')
|
||||
if not seo_name:
|
||||
continue
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': program.get('id'),
|
||||
'title': program.get('title') or video.get('title'),
|
||||
'url': 'https://www.nba.com/watch/video/' + seo_name,
|
||||
'thumbnail': video.get('image'),
|
||||
'description': program.get('description') or video.get('description'),
|
||||
'duration': parse_duration(program.get('runtimeHours')),
|
||||
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
orig_path = path
|
||||
if path.startswith('nba/'):
|
||||
path = path[3:]
|
||||
collection_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, collection_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, collection_id)
|
||||
|
||||
if 'video/' not in path:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
||||
|
||||
if path == '{{id}}':
|
||||
return self._extract_playlist(orig_path, video_id, webpage)
|
||||
class NBABaseIE(NBACVPBaseIE):
|
||||
_VALID_URL_BASE = r'''(?x)
|
||||
https?://(?:www\.)?nba\.com/
|
||||
(?P<team>
|
||||
blazers|
|
||||
bucks|
|
||||
bulls|
|
||||
cavaliers|
|
||||
celtics|
|
||||
clippers|
|
||||
grizzlies|
|
||||
hawks|
|
||||
heat|
|
||||
hornets|
|
||||
jazz|
|
||||
kings|
|
||||
knicks|
|
||||
lakers|
|
||||
magic|
|
||||
mavericks|
|
||||
nets|
|
||||
nuggets|
|
||||
pacers|
|
||||
pelicans|
|
||||
pistons|
|
||||
raptors|
|
||||
rockets|
|
||||
sixers|
|
||||
spurs|
|
||||
suns|
|
||||
thunder|
|
||||
timberwolves|
|
||||
warriors|
|
||||
wizards
|
||||
)
|
||||
(?:/play\#)?/'''
|
||||
_CHANNEL_PATH_REGEX = r'video/channel|series'
|
||||
|
||||
# See prepareContentId() of pkgCvp.js
|
||||
if path.startswith('video/teams'):
|
||||
path = 'video/channels/proxy/' + path[6:]
|
||||
def _embed_url_result(self, team, content_id):
|
||||
return self.url_result(update_url_query(
|
||||
'https://secure.nba.com/assets/amp/include/video/iframe.html', {
|
||||
'contentId': content_id,
|
||||
'team': team,
|
||||
}), NBAEmbedIE.ie_key())
|
||||
|
||||
return self._extract_cvp_info(
|
||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
'm3u8': {
|
||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||
},
|
||||
def _call_api(self, team, content_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
|
||||
content_id, 'Download %s JSON metadata' % resource,
|
||||
query=query, headers={
|
||||
'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
|
||||
})['response']['result']
|
||||
|
||||
def _extract_video(self, video, team, extract_all=True):
|
||||
video_id = compat_str(video['nid'])
|
||||
team = video['brand']
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video.get('title') or video.get('headline') or video['shortHeadline'],
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
}
|
||||
|
||||
subtitles = {}
|
||||
captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||
|
||||
formats = []
|
||||
mp4_url = video.get('mp4')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
if extract_all:
|
||||
source_url = video.get('videoSource')
|
||||
if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
|
||||
formats.append({
|
||||
'format_id': 'source',
|
||||
'url': source_url,
|
||||
'preference': 1,
|
||||
})
|
||||
|
||||
m3u8_url = video.get('m3u8')
|
||||
if m3u8_url:
|
||||
if '.akamaihd.net/i/' in m3u8_url:
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
content_xml = video.get('contentXml')
|
||||
if team and content_xml:
|
||||
cvp_info = self._extract_nba_cvp_info(
|
||||
team + content_xml, video_id, fatal=False)
|
||||
if cvp_info:
|
||||
formats.extend(cvp_info['formats'])
|
||||
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
|
||||
info = merge_dicts(info, cvp_info)
|
||||
|
||||
self._sort_formats(formats)
|
||||
else:
|
||||
info.update(self._embed_url_result(team, video['videoId']))
|
||||
|
||||
info.update({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _real_extract(self, url):
|
||||
team, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if '/play#/' in url:
|
||||
display_id = compat_urllib_parse_unquote(display_id)
|
||||
else:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
display_id = self._search_regex(
|
||||
self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
|
||||
return self._extract_url_results(team, display_id)
|
||||
|
||||
|
||||
class NBAEmbedIE(NBABaseIE):
|
||||
IENAME = 'nba:embed'
|
||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
content_id = qs['contentId'][0]
|
||||
team = qs.get('team', [None])[0]
|
||||
if not team:
|
||||
return self.url_result(
|
||||
'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
|
||||
video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
|
||||
return self._extract_video(video, team)
|
||||
|
||||
|
||||
class NBAIE(NBABaseIE):
|
||||
IENAME = 'nba'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
||||
'info_dict': {
|
||||
'id': '45039',
|
||||
'ext': 'mp4',
|
||||
'title': 'AND WE BACK.',
|
||||
'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
|
||||
'duration': 94,
|
||||
'timestamp': 1607112000,
|
||||
'upload_date': '20201218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoID'
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
return self._embed_url_result(team, content_id)
|
||||
|
||||
|
||||
class NBAChannelIE(NBABaseIE):
|
||||
IENAME = 'nba:channel'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
||||
'info_dict': {
|
||||
'title': 'Summer League',
|
||||
},
|
||||
'playlist_mincount': 138,
|
||||
}, {
|
||||
'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_CONTENT_ID_REGEX = r'videoSubCategory'
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, team, channel, page):
|
||||
results = self._call_api(team, channel, {
|
||||
'channels': channel,
|
||||
'count': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in results:
|
||||
yield self._extract_video(video, team, False)
|
||||
|
||||
def _extract_url_results(self, team, content_id):
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, team, content_id),
|
||||
self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, playlist_title=content_id)
|
||||
|
@@ -158,7 +158,8 @@ class NBCIE(AdobePassIE):
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||
@@ -174,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
iframe_m = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||
r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
|
||||
if iframe_m:
|
||||
return iframe_m.group('url')
|
||||
|
||||
@@ -192,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
# Does not include https because its certificate is invalid
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# iframe src
|
||||
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||
'info_dict': {
|
||||
'id': 'PHJSaFWbrTY9',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'upload_date': '20150330',
|
||||
'timestamp': 1427726529,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# data-mpx-src
|
||||
'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data-src
|
||||
'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -274,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE):
|
||||
}
|
||||
|
||||
|
||||
class CSNNEIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
|
||||
'info_dict': {
|
||||
'id': 'yvBLLUgQ8WU0',
|
||||
'ext': 'mp4',
|
||||
'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
|
||||
'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
|
||||
'timestamp': 1459369979,
|
||||
'upload_date': '20160330',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': self._html_search_meta('twitter:player:stream', webpage),
|
||||
'display_id': display_id,
|
||||
}
|
||||
|
||||
|
||||
class NBCNewsIE(ThePlatformIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||
|
||||
|
@@ -90,7 +90,7 @@ class NhkVodIE(NhkBaseIE):
|
||||
_TESTS = [{
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
||||
'info_dict': {
|
||||
'id': 'a95j5iza',
|
||||
'ext': 'mp4',
|
||||
|
@@ -1,20 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import datetime
|
||||
import functools
|
||||
import json
|
||||
import math
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
InAdvancePagedList,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
@@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor):
|
||||
if urlh is False:
|
||||
login_ok = False
|
||||
else:
|
||||
parts = compat_urlparse.urlparse(urlh.geturl())
|
||||
parts = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||
login_ok = False
|
||||
if not login_ok:
|
||||
@@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor):
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
|
||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
||||
flv_info = compat_parse_qs(flv_info_webpage)
|
||||
if 'url' not in flv_info:
|
||||
if 'deleted' in flv_info:
|
||||
raise ExtractorError('The video has been deleted.',
|
||||
@@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor):
|
||||
|
||||
|
||||
class NiconicoPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||
'info_dict': {
|
||||
'id': '27411728',
|
||||
'title': 'AKB48のオールナイトニッポン',
|
||||
'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
|
||||
'uploader': 'のっく',
|
||||
'uploader_id': '805442',
|
||||
},
|
||||
'playlist_mincount': 225,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _call_api(self, list_id, resource, query):
|
||||
return self._download_json(
|
||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||
'Downloading %s JSON metatdata' % resource, query=query,
|
||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||
|
||||
def _parse_owner(self, item):
|
||||
owner = item.get('owner') or {}
|
||||
if owner:
|
||||
return {
|
||||
'uploader': owner.get('name'),
|
||||
'uploader_id': owner.get('id'),
|
||||
}
|
||||
return {}
|
||||
|
||||
def _fetch_page(self, list_id, page):
|
||||
page += 1
|
||||
items = self._call_api(list_id, 'page %d' % page, {
|
||||
'page': page,
|
||||
'pageSize': self._PAGE_SIZE,
|
||||
})['items']
|
||||
for item in items:
|
||||
video = item.get('video') or {}
|
||||
video_id = video.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
count = video.get('count') or {}
|
||||
get_count = lambda x: int_or_none(count.get(x))
|
||||
info = {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
'description': video.get('shortDescription'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'view_count': get_count('view'),
|
||||
'comment_count': get_count('comment'),
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
}
|
||||
info.update(self._parse_owner(video))
|
||||
yield info
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
|
||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
||||
webpage, 'entries')
|
||||
entries = json.loads(entries_json)
|
||||
entries = [{
|
||||
'_type': 'url',
|
||||
'ie_key': NiconicoIE.ie_key(),
|
||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
||||
entry['item_data']['video_id']),
|
||||
} for entry in entries]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
|
||||
'id': list_id,
|
||||
'entries': entries,
|
||||
}
|
||||
mylist = self._call_api(list_id, 'list', {
|
||||
'pageSize': 1,
|
||||
})
|
||||
entries = InAdvancePagedList(
|
||||
functools.partial(self._fetch_page, list_id),
|
||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||
self._PAGE_SIZE)
|
||||
result = self.playlist_result(
|
||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||
result.update(self._parse_owner(mylist))
|
||||
return result
|
||||
|
@@ -5,10 +5,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'$include': '[HasClosedCaptions]',
|
||||
})
|
||||
|
||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
|
||||
if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
manifest_base_url = content_package_url + 'manifest.'
|
||||
@@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for image in content.get('Images', []):
|
||||
for image in (content.get('Images') or []):
|
||||
image_url = image.get('Url')
|
||||
if not image_url:
|
||||
continue
|
||||
@@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
continue
|
||||
container.append(e_name)
|
||||
|
||||
season = content.get('Season', {})
|
||||
season = content.get('Season') or {}
|
||||
|
||||
info = {
|
||||
'id': content_id,
|
||||
@@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
|
||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||
'episode_number': int_or_none(content.get('Episode')),
|
||||
'season': season.get('Name'),
|
||||
'season_number': season.get('Number'),
|
||||
'season_number': int_or_none(season.get('Number')),
|
||||
'season_id': season.get('Id'),
|
||||
'series': content.get('Media', {}).get('Name'),
|
||||
'series': try_get(content, lambda x: x['Media']['Name']),
|
||||
'tags': tags,
|
||||
'categories': categories,
|
||||
'duration': float_or_none(content_package.get('Duration')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
if content_package.get('HasClosedCaptions'):
|
||||
|
@@ -33,8 +33,7 @@ class NRKBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_nrk_formats(self, asset_url, video_id):
|
||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||
return self._extract_akamai_formats(
|
||||
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
|
||||
return self._extract_akamai_formats(asset_url, video_id)
|
||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
|
@@ -6,16 +6,33 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class PikselIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:
|
||||
(?:
|
||||
player\.
|
||||
(?:
|
||||
olympusattelecom|
|
||||
vibebyvista
|
||||
)|
|
||||
(?:api|player)\.multicastmedia|
|
||||
(?:api-ovp|player)\.piksel
|
||||
)\.com|
|
||||
(?:
|
||||
mz-edge\.stream\.co|
|
||||
movie-s\.nhk\.or
|
||||
)\.jp|
|
||||
vidego\.baltimorecity\.gov
|
||||
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://player.piksel.com/v/ums2867l',
|
||||
@@ -56,46 +73,41 @@ class PikselIE(InfoExtractor):
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _call_api(self, app_token, resource, display_id, query, fatal=True):
|
||||
response = (self._download_json(
|
||||
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
|
||||
display_id, query=query, fatal=fatal) or {}).get('response')
|
||||
failure = try_get(response, lambda x: x['failure']['reason'])
|
||||
if failure:
|
||||
if fatal:
|
||||
raise ExtractorError(failure, expected=True)
|
||||
self.report_warning(failure)
|
||||
return response
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
ref_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
|
||||
webpage, 'program uuid', default=display_id)
|
||||
app_token = self._search_regex([
|
||||
r'clientAPI\s*:\s*"([^"]+)"',
|
||||
r'data-de-api-key\s*=\s*"([^"]+)"'
|
||||
], webpage, 'app token')
|
||||
response = self._download_json(
|
||||
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
|
||||
video_id, query={
|
||||
'v': video_id
|
||||
})['response']
|
||||
failure = response.get('failure')
|
||||
if failure:
|
||||
raise ExtractorError(response['failure']['reason'], expected=True)
|
||||
video_data = response['WsProgramResponse']['program']['asset']
|
||||
query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
|
||||
program = self._call_api(
|
||||
app_token, 'program', display_id, query)['WsProgramResponse']['program']
|
||||
video_id = program['uuid']
|
||||
video_data = program['asset']
|
||||
title = video_data['title']
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
|
||||
for asset_file in video_data.get('assetFiles', []):
|
||||
def process_asset_file(asset_file):
|
||||
if not asset_file:
|
||||
return
|
||||
# TODO: extract rtmp formats
|
||||
http_url = asset_file.get('http_url')
|
||||
if not http_url:
|
||||
continue
|
||||
return
|
||||
tbr = None
|
||||
vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
|
||||
abr = int_or_none(asset_file.get('audioBitrate'), 1024)
|
||||
@@ -118,6 +130,43 @@ class PikselIE(InfoExtractor):
|
||||
'filesize': int_or_none(asset_file.get('filesize')),
|
||||
'tbr': tbr,
|
||||
})
|
||||
|
||||
def process_asset_files(asset_files):
|
||||
for asset_file in (asset_files or []):
|
||||
process_asset_file(asset_file)
|
||||
|
||||
process_asset_files(video_data.get('assetFiles'))
|
||||
process_asset_file(video_data.get('referenceFile'))
|
||||
if not formats:
|
||||
asset_id = video_data.get('assetid') or program.get('assetid')
|
||||
if asset_id:
|
||||
process_asset_files(try_get(self._call_api(
|
||||
app_token, 'asset_file', display_id, {
|
||||
'assetid': asset_id,
|
||||
}, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
|
||||
|
||||
m3u8_url = dict_get(video_data, [
|
||||
'm3u8iPadURL',
|
||||
'ipadM3u8Url',
|
||||
'm3u8AndroidURL',
|
||||
'm3u8iPhoneURL',
|
||||
'iphoneM3u8Url'])
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
|
||||
if smil_url:
|
||||
transform_source = None
|
||||
if ref_id == 'nhkworld':
|
||||
# TODO: figure out if this is something to be fixed in urljoin,
|
||||
# _parse_smil_formats or keep it here
|
||||
transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
|
||||
formats.extend(self._extract_smil_formats(
|
||||
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
|
||||
transform_source=transform_source, fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@@ -288,14 +288,24 @@ class PornHubIE(PornHubBaseIE):
|
||||
video_urls.append((v_url, None))
|
||||
video_urls_set.add(v_url)
|
||||
|
||||
def parse_quality_items(quality_items):
|
||||
q_items = self._parse_json(quality_items, video_id, fatal=False)
|
||||
if not isinstance(q_items, list):
|
||||
return
|
||||
for item in q_items:
|
||||
if isinstance(item, dict):
|
||||
add_video_url(item.get('url'))
|
||||
|
||||
if not video_urls:
|
||||
FORMAT_PREFIXES = ('media', 'quality')
|
||||
FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
|
||||
js_vars = extract_js_vars(
|
||||
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
|
||||
default=None)
|
||||
if js_vars:
|
||||
for key, format_url in js_vars.items():
|
||||
if any(key.startswith(p) for p in FORMAT_PREFIXES):
|
||||
if key.startswith(FORMAT_PREFIXES[-1]):
|
||||
parse_quality_items(format_url)
|
||||
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
|
||||
add_video_url(format_url)
|
||||
if not video_urls and re.search(
|
||||
r'<[^>]+\bid=["\']lockedPlayer', webpage):
|
||||
@@ -351,12 +361,16 @@ class PornHubIE(PornHubBaseIE):
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
def extract_vote_count(kind, name):
|
||||
return self._extract_count(
|
||||
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
|
||||
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
|
||||
webpage, name)
|
||||
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
|
||||
like_count = extract_vote_count('Up', 'like')
|
||||
dislike_count = extract_vote_count('Down', 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -56,7 +57,8 @@ class RedditRIE(InfoExtractor):
|
||||
'id': 'zv89llsvexdz',
|
||||
'ext': 'mp4',
|
||||
'title': 'That small heart attack.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:4',
|
||||
'timestamp': 1501941939,
|
||||
'upload_date': '20170805',
|
||||
'uploader': 'Antw87',
|
||||
@@ -118,11 +120,34 @@ class RedditRIE(InfoExtractor):
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
thumbnails = []
|
||||
|
||||
def add_thumbnail(src):
|
||||
if not isinstance(src, dict):
|
||||
return
|
||||
thumbnail_url = url_or_none(src.get('url'))
|
||||
if not thumbnail_url:
|
||||
return
|
||||
thumbnails.append({
|
||||
'url': unescapeHTML(thumbnail_url),
|
||||
'width': int_or_none(src.get('width')),
|
||||
'height': int_or_none(src.get('height')),
|
||||
})
|
||||
|
||||
for image in try_get(data, lambda x: x['preview']['images']) or []:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
add_thumbnail(image.get('source'))
|
||||
resolutions = image.get('resolutions')
|
||||
if isinstance(resolutions, list):
|
||||
for resolution in resolutions:
|
||||
add_thumbnail(resolution)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'title': data.get('title'),
|
||||
'thumbnail': url_or_none(data.get('thumbnail')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': float_or_none(data.get('created_utc')),
|
||||
'uploader': data.get('author'),
|
||||
'duration': int_or_none(try_get(
|
||||
|
@@ -4,8 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -41,16 +45,22 @@ class SevenPlusIE(BrightcoveNewIE):
|
||||
def _real_extract(self, url):
|
||||
path, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
try:
|
||||
media = self._download_json(
|
||||
'https://videoservice.swm.digital/playback', episode_id, query={
|
||||
'appId': '7plus',
|
||||
'deviceType': 'web',
|
||||
'platformType': 'web',
|
||||
'accountId': 5303576322001,
|
||||
'referenceId': 'ref:' + episode_id,
|
||||
'deliveryId': 'csai',
|
||||
'videoType': 'vod',
|
||||
})['media']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), episode_id)[0]['error_code'], expected=True)
|
||||
raise
|
||||
|
||||
for source in media.get('sources', {}):
|
||||
src = source.get('src')
|
||||
|
@@ -1,40 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SonyLIVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||
'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
|
||||
'info_dict': {
|
||||
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||
'id': 'ref:5024612095001',
|
||||
'title': 'Bachelors Delight - Achaari Cheese Toast',
|
||||
'id': '1000022678',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170923',
|
||||
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||
'uploader_id': '5182475815001',
|
||||
'timestamp': 1506200547,
|
||||
'upload_date': '20200411',
|
||||
'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
|
||||
'timestamp': 1586632091,
|
||||
'duration': 185,
|
||||
'season_number': 1,
|
||||
'episode': 'Achaari Cheese Toast',
|
||||
'episode_number': 1,
|
||||
'release_year': 2016,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||
'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_TOKEN = None
|
||||
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s'
|
||||
def _call_api(self, version, path, video_id):
|
||||
headers = {}
|
||||
if self._TOKEN:
|
||||
headers['security_token'] = self._TOKEN
|
||||
try:
|
||||
return self._download_json(
|
||||
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
|
||||
video_id, headers=headers)['resultObj']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
message = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['message']
|
||||
if message == 'Geoblocked Country':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(message)
|
||||
raise
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
brightcove_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {
|
||||
'geo_countries': ['IN'],
|
||||
'referrer': url,
|
||||
}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
video_id = self._match_id(url)
|
||||
content = self._call_api(
|
||||
'1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
|
||||
if content.get('isEncrypted'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
dash_url = content['videoURL']
|
||||
headers = {
|
||||
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
|
||||
}
|
||||
formats = self._extract_mpd_formats(
|
||||
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
|
||||
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._call_api(
|
||||
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
|
||||
title = metadata['title']
|
||||
episode = metadata.get('episodeTitle')
|
||||
if episode and title != episode:
|
||||
title += ' - ' + episode
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('posterURL'),
|
||||
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
|
||||
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'season_number': int_or_none(metadata.get('season')),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'release_year': int_or_none(metadata.get('year')),
|
||||
}
|
||||
|
@@ -7,17 +7,24 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class SpankBangIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:[^/]+\.)?spankbang\.com/
|
||||
(?:
|
||||
(?P<id>[\da-z]+)/(?:video|play|embed)\b|
|
||||
[\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
|
||||
'md5': '1cc433e1d6aa14bc376535b8679302f7',
|
||||
@@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2y3td/embed/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
webpage = self._download_webpage(
|
||||
url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
|
||||
video_id, headers={'Cookie': 'country=US'})
|
||||
@@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor):
|
||||
|
||||
|
||||
class SpankBangPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
|
||||
'info_dict': {
|
||||
'id': 'ug0k',
|
||||
'title': 'Big Ass Titties',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_mincount': 40,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
|
||||
|
||||
entries = [self.url_result(
|
||||
'https://spankbang.com/%s/video' % video_id,
|
||||
ie=SpankBangIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
|
||||
urljoin(url, mobj.group('path')),
|
||||
ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
|
||||
% re.escape(display_id), webpage)]
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
|
||||
r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
|
||||
fatal=False)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title)
|
||||
|
@@ -3,50 +3,62 @@ from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'md5': '74bf14128578d1e040c3ebc82088f45f',
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': '9dexnwtmh8_X',
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Cowboy Adventure',
|
||||
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
|
||||
'timestamp': 1437758640,
|
||||
'upload_date': '20150724',
|
||||
'uploader': 'NBCU-SPROUT-NEW',
|
||||
}
|
||||
}
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_component = self._search_regex(
|
||||
r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
|
||||
webpage, 'video component', default=None)
|
||||
if video_component:
|
||||
options = self._parse_json(extract_attributes(
|
||||
video_component)['data-options'], video_id)
|
||||
theplatform_url = options['video']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if options.get('protected'):
|
||||
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
theplatform_url, query), {'force_smil_url': True})
|
||||
else:
|
||||
iframe = self._search_regex(
|
||||
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
|
||||
webpage, 'iframe')
|
||||
theplatform_url = extract_attributes(iframe)['src']
|
||||
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@@ -2,25 +2,40 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class StreetVoiceIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://streetvoice.com/skippylu/songs/94440/',
|
||||
'md5': '15974627fc01a29e492c98593c2fd472',
|
||||
'url': 'https://streetvoice.com/skippylu/songs/123688/',
|
||||
'md5': '0eb535970629a5195685355f3ed60bfd',
|
||||
'info_dict': {
|
||||
'id': '94440',
|
||||
'id': '123688',
|
||||
'ext': 'mp3',
|
||||
'title': '輸',
|
||||
'description': 'Crispy脆樂團 - 輸',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 260,
|
||||
'upload_date': '20091018',
|
||||
'title': '流浪',
|
||||
'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 270,
|
||||
'upload_date': '20100923',
|
||||
'uploader': 'Crispy脆樂團',
|
||||
'uploader_id': '627810',
|
||||
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
|
||||
'timestamp': 1285261661,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'track': '流浪',
|
||||
'track_id': '123688',
|
||||
'album': '2010',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
|
||||
@@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
|
||||
song = self._download_json(
|
||||
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'')
|
||||
|
||||
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
|
||||
song = self._download_json(base_url, song_id, query={
|
||||
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
|
||||
})
|
||||
title = song['name']
|
||||
author = song['user']['nickname']
|
||||
|
||||
formats = []
|
||||
for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
|
||||
f_url = (self._download_json(
|
||||
base_url + suffix + '/', song_id,
|
||||
'Downloading %s format URL' % format_id,
|
||||
data=b'', fatal=False) or {}).get('file')
|
||||
if not f_url:
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp3',
|
||||
'format_id': format_id,
|
||||
'url': f_url,
|
||||
'vcodec': 'none',
|
||||
}
|
||||
if format_id == 'hls':
|
||||
f['protocol'] = 'm3u8_native'
|
||||
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
|
||||
if abr:
|
||||
abr = int(abr)
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
user = song.get('user') or {}
|
||||
username = user.get('username')
|
||||
get_count = lambda x: int_or_none(song.get(x + '_count'))
|
||||
|
||||
return {
|
||||
'id': song_id,
|
||||
'url': song['file'],
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': '%s - %s' % (author, title),
|
||||
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'),
|
||||
'duration': song.get('length'),
|
||||
'upload_date': unified_strdate(song.get('created_at')),
|
||||
'uploader': author,
|
||||
'uploader_id': compat_str(song['user']['id']),
|
||||
'description': strip_or_none(song.get('synopsis')),
|
||||
'thumbnail': song.get('image'),
|
||||
'duration': int_or_none(song.get('length')),
|
||||
'timestamp': parse_iso8601(song.get('created_at')),
|
||||
'uploader': try_get(user, lambda x: x['profile']['nickname']),
|
||||
'uploader_id': str_or_none(user.get('id')),
|
||||
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
|
||||
'view_count': get_count('plays'),
|
||||
'like_count': get_count('likes'),
|
||||
'comment_count': get_count('comments'),
|
||||
'repost_count': get_count('share'),
|
||||
'track': title,
|
||||
'track_id': song_id,
|
||||
'album': try_get(song, lambda x: x['album']['name']),
|
||||
}
|
||||
|
@@ -1,43 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
|
||||
|
||||
class TastyTradeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017',
|
||||
'info_dict': {
|
||||
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
|
||||
'ext': 'mp4',
|
||||
'title': 'A History of Teaming',
|
||||
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
|
||||
'duration': 422.255,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
}, {
|
||||
'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, fatal=False)
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': OoyalaIE.ie_key(),
|
||||
'url': 'ooyala:%s' % ooyala_code,
|
||||
'display_id': display_id,
|
||||
})
|
||||
return info
|
@@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE):
|
||||
@staticmethod
|
||||
def _is_teachable(webpage):
|
||||
return 'teachableTracker.linker:autoLink' in webpage and re.search(
|
||||
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
|
||||
r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
|
||||
webpage)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -5,14 +5,11 @@ import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||
'md5': '7ee56d665cfd241c0e6d80fd175068b0',
|
||||
'info_dict': {
|
||||
'id': 'JEA5ijCnF6p5W08A1rNKn7',
|
||||
'ext': 'mp4',
|
||||
@@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor):
|
||||
}]
|
||||
}, {
|
||||
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
|
||||
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
|
||||
'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
|
||||
'info_dict': {
|
||||
'id': 'jn24Od1zGLG4XUZcnUnZB6',
|
||||
'ext': 'mp4',
|
||||
@@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
|
||||
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
|
||||
'md5': 'eddb50291df704ce23c74821b995bcac',
|
||||
'info_dict': {
|
||||
'id': 'aywerkD2Sv1vGNqq9b85Q2',
|
||||
'ext': 'mp4',
|
||||
@@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor):
|
||||
|
||||
def _parse_content(self, content, url):
|
||||
video_id = content['dataMediaId']
|
||||
if content.get('dataCmsId') == 'ooyala':
|
||||
return self.url_result(
|
||||
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
|
||||
config_url = urljoin(url, content['dataConfig'])
|
||||
config = self._download_json(
|
||||
config_url, video_id, 'Downloading config JSON')
|
||||
content['dataConfig'], video_id, 'Downloading config JSON')
|
||||
title = config['info']['title']
|
||||
|
||||
def mmc_url(mmc_type):
|
||||
return re.sub(
|
||||
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
|
||||
config['services']['mmc'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for mmc_type in ('flash', 'html5'):
|
||||
mmc = self._download_json(
|
||||
mmc_url(mmc_type), video_id,
|
||||
'Downloading %s mmc JSON' % mmc_type, fatal=False)
|
||||
if not mmc:
|
||||
continue
|
||||
if not duration:
|
||||
duration = int_or_none(mmc.get('duration'))
|
||||
for location in mmc['locations']:
|
||||
gat = self._proto_relative_url(location.get('gat'), 'http:')
|
||||
gcp = location.get('gcp')
|
||||
ogn = location.get('ogn')
|
||||
if None in (gat, gcp, ogn):
|
||||
continue
|
||||
token_data = {
|
||||
'gcp': gcp,
|
||||
'ogn': ogn,
|
||||
'sta': 0,
|
||||
}
|
||||
media = self._download_json(
|
||||
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'Referer': url,
|
||||
}, fatal=False) or {}
|
||||
stream = media.get('stream') or media.get('file')
|
||||
if not stream:
|
||||
continue
|
||||
ext = determine_ext(stream)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
services = config['services']
|
||||
caronte = self._download_json(services['caronte'], video_id)
|
||||
stream = caronte['dls'][0]['stream']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
'Origin': re.match(r'https?://[^/]+', url).group(0),
|
||||
})
|
||||
cdn = self._download_json(
|
||||
caronte['cerbero'], video_id, data=json.dumps({
|
||||
'bbx': caronte['bbx'],
|
||||
'gbx': self._download_json(services['gbx'], video_id)['gbx'],
|
||||
}).encode(), headers=headers)['tokens']['1']['cdn']
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor):
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
|
||||
'duration': duration,
|
||||
'duration': int_or_none(content.get('dataDuration')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
# smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,14 +25,16 @@ class TenPlayIE(InfoExtractor):
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
# 'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
|
||||
_GEO_BYPASS = False
|
||||
_FASTLY_URL_TEMPL = 'https://10-selector.global.ssl.fastly.net/s/kYEXFC/media/%s?mbr=true&manifest=m3u&format=redirect'
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id = self._match_id(url)
|
||||
@@ -40,19 +43,28 @@ class TenPlayIE(InfoExtractor):
|
||||
video = data.get('video') or {}
|
||||
metadata = data.get('metaData') or {}
|
||||
brightcove_id = video.get('videoId') or metadata['showContentVideoId']
|
||||
brightcove_url = smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['AU']})
|
||||
# brightcove_url = smuggle_url(
|
||||
# self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
# {'geo_countries': ['AU']})
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
self._FASTLY_URL_TEMPL % brightcove_id), brightcove_id).geturl()
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
formats = self._extract_m3u8_formats(m3u8_url, brightcove_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': brightcove_url,
|
||||
'id': content_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': brightcove_url,
|
||||
'formats': formats,
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title') or metadata.get('pageContentName') or metadata['showContentName'],
|
||||
'description': video.get('description'),
|
||||
'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
|
||||
'series': metadata.get('showName'),
|
||||
'season': metadata.get('showContentSeason'),
|
||||
'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'thumbnail': video.get('poster'),
|
||||
'uploader_id': '2199827728001',
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
@@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
provider_id = mobj.group('provider_id')
|
||||
|
@@ -1,18 +1,22 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class TheWeatherChannelIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
|
||||
'md5': 'ab924ac9574e79689c24c6b95e957def',
|
||||
'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
|
||||
'info_dict': {
|
||||
'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
|
||||
'ext': 'mp4',
|
||||
@@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
|
||||
'uploader': 'TWC - Digital (No Distro)',
|
||||
'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
|
||||
'upload_date': '20160720',
|
||||
'timestamp': 1469018835,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
drupal_settings = self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings'), display_id)
|
||||
video_id = drupal_settings['twc']['contexts']['node']['uuid']
|
||||
video_data = self._download_json(
|
||||
'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
|
||||
asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
|
||||
if not locale:
|
||||
locale = 'en-US'
|
||||
video_data = list(self._download_json(
|
||||
'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
|
||||
'name': 'getCMSAssetsUrlConfig',
|
||||
'params': {
|
||||
'language': locale.replace('-', '_'),
|
||||
'query': {
|
||||
'assetName': {
|
||||
'$in': asset_name,
|
||||
},
|
||||
},
|
||||
}
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
|
||||
video_id = video_data['id']
|
||||
seo_meta = video_data.get('seometa', {})
|
||||
title = video_data.get('title') or seo_meta['title']
|
||||
|
||||
@@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
cc_url = video_data.get('cc_url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
@@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):
|
||||
'duration': parse_duration(video_data.get('duration')),
|
||||
'uploader': video_data.get('providername'),
|
||||
'uploader_id': video_data.get('providerid'),
|
||||
'timestamp': parse_iso8601(video_data.get('publishdate')),
|
||||
'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor):
|
||||
|
||||
class MeWatchIE(InfoExtractor):
|
||||
IE_NAME = 'mewatch'
|
||||
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
|
||||
'info_dict': {
|
||||
@@ -214,6 +214,15 @@ class MeWatchIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
fix_xml_ampersands,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
determine_ext,
|
||||
@@ -49,8 +50,13 @@ class TurnerBaseIE(AdobePassIE):
|
||||
self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
|
||||
return video_url + '?hdnea=' + token
|
||||
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}):
|
||||
video_data = self._download_xml(data_src, video_id)
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
|
||||
video_data = self._download_xml(
|
||||
data_src, video_id,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
fatal=fatal)
|
||||
if not video_data:
|
||||
return {}
|
||||
video_id = video_data.attrib['id']
|
||||
title = xpath_text(video_data, 'headline', fatal=True)
|
||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||
@@ -63,12 +69,14 @@ class TurnerBaseIE(AdobePassIE):
|
||||
|
||||
urls = []
|
||||
formats = []
|
||||
thumbnails = []
|
||||
subtitles = {}
|
||||
rex = re.compile(
|
||||
r'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
|
||||
# Possible formats locations: files/file, files/groupFiles/files
|
||||
# and maybe others
|
||||
for video_file in video_data.findall('.//file'):
|
||||
video_url = video_file.text.strip()
|
||||
video_url = url_or_none(video_file.text.strip())
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
@@ -108,9 +116,28 @@ class TurnerBaseIE(AdobePassIE):
|
||||
continue
|
||||
urls.append(video_url)
|
||||
format_id = video_file.get('bitrate')
|
||||
if ext == 'smil':
|
||||
if ext in ('scc', 'srt', 'vtt'):
|
||||
subtitles.setdefault('en', []).append({
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'png':
|
||||
thumbnails.append({
|
||||
'id': format_id,
|
||||
'url': video_url,
|
||||
})
|
||||
elif ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
video_url, video_id, fatal=False))
|
||||
elif re.match(r'https?://[^/]+\.akamaihd\.net/[iz]/', video_url):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
video_url, video_id, {
|
||||
'hds': path_data.get('f4m', {}).get('host'),
|
||||
# nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
|
||||
# ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
|
||||
# ssl.cdn.turner.com
|
||||
'http': 'pmd.cdn.turner.com',
|
||||
}))
|
||||
elif ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4',
|
||||
@@ -129,7 +156,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
}
|
||||
mobj = rex.search(format_id + video_url)
|
||||
mobj = rex.search(video_url)
|
||||
if mobj:
|
||||
f.update({
|
||||
'width': int(mobj.group('width')),
|
||||
@@ -152,7 +179,6 @@ class TurnerBaseIE(AdobePassIE):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for source in video_data.findall('closedCaptions/source'):
|
||||
for track in source.findall('track'):
|
||||
track_url = url_or_none(track.get('url'))
|
||||
@@ -168,12 +194,12 @@ class TurnerBaseIE(AdobePassIE):
|
||||
}.get(source.get('format'))
|
||||
})
|
||||
|
||||
thumbnails = [{
|
||||
'id': image.get('cut'),
|
||||
thumbnails.extend({
|
||||
'id': image.get('cut') or image.get('name'),
|
||||
'url': image.text,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data.findall('images/image')]
|
||||
} for image in video_data.findall('images/image'))
|
||||
|
||||
is_live = xpath_text(video_data, 'isLive') == 'true'
|
||||
|
||||
|
@@ -5,10 +5,9 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class UKTVPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/.+?\?.*?\bvideo=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://uktvplay\.uktv\.co\.uk/(?:.+?\?.*?\bvideo=|([^/]+/)*watch-online/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/world-at-war/c/200/watch-online/?video=2117008346001',
|
||||
'md5': '',
|
||||
'info_dict': {
|
||||
'id': '2117008346001',
|
||||
'ext': 'mp4',
|
||||
@@ -23,7 +22,11 @@ class UKTVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest']
|
||||
}
|
||||
}, {
|
||||
'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/1242911124001/OrCyvJ2gyL_default/index.html?videoId=%s'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1242911124001/H1xnMOqP_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vhx:embed'
|
||||
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
|
||||
return unescapeHTML(mobj.group(1)) if mobj else None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
@@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
|
||||
'ott data'), video_id, js_to_json)['config_url']
|
||||
config = self._download_json(config_url, video_id)
|
||||
info = self._parse_config(config, video_id)
|
||||
info['id'] = video_id
|
||||
self._vimeo_sort_formats(info['formats'])
|
||||
return info
|
||||
|
@@ -12,7 +12,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class VVVVIDIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
|
||||
_VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
# video_type == 'video/vvvvid'
|
||||
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
|
||||
@@ -21,6 +22,16 @@ class VVVVIDIE(InfoExtractor):
|
||||
'id': '489048',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ping Pong',
|
||||
'duration': 239,
|
||||
'series': '"Perché dovrei guardarlo?" di Dario Moccia',
|
||||
'season_id': '437',
|
||||
'season_number': 1,
|
||||
'episode': 'Ping Pong',
|
||||
'episode_number': 1,
|
||||
'episode_id': '3334',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -37,6 +48,9 @@ class VVVVIDIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||
'only_matching': True
|
||||
}]
|
||||
_conn_id = None
|
||||
|
||||
@@ -45,20 +59,36 @@ class VVVVIDIE(InfoExtractor):
|
||||
'https://www.vvvvid.it/user/login',
|
||||
None, headers=self.geo_verification_headers())['data']['conn_id']
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
def _download_info(self, show_id, path, video_id, fatal=True):
|
||||
response = self._download_json(
|
||||
'https://www.vvvvid.it/vvvvid/ondemand/%s/season/%s' % (show_id, season_id),
|
||||
'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
|
||||
video_id, headers=self.geo_verification_headers(), query={
|
||||
'conn_id': self._conn_id,
|
||||
})
|
||||
if response['result'] == 'error':
|
||||
}, fatal=fatal)
|
||||
if not (response or fatal):
|
||||
return
|
||||
if response.get('result') == 'error':
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, response['message']), expected=True)
|
||||
return response['data']
|
||||
|
||||
def _extract_common_video_info(self, video_data):
|
||||
return {
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'episode_number': int_or_none(video_data.get('number')),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
response = self._download_info(
|
||||
show_id, 'season/%s' % season_id, video_id)
|
||||
|
||||
vid = int(video_id)
|
||||
video_data = list(filter(
|
||||
lambda episode: episode.get('video_id') == vid, response['data']))[0]
|
||||
lambda episode: episode.get('video_id') == vid, response))[0]
|
||||
title = video_data['title']
|
||||
formats = []
|
||||
|
||||
# vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
|
||||
@@ -141,18 +171,67 @@ class VVVVIDIE(InfoExtractor):
|
||||
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
info = self._extract_common_video_info(video_data)
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': video_data['title'],
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
'series': video_data.get('show_title'),
|
||||
'season_id': season_id,
|
||||
'season_number': video_data.get('season_number'),
|
||||
'episode_id': str_or_none(video_data.get('id')),
|
||||
'episode_number': int_or_none(video_data.get('number')),
|
||||
'episode_title': video_data['title'],
|
||||
'episode': title,
|
||||
'view_count': int_or_none(video_data.get('views')),
|
||||
'like_count': int_or_none(video_data.get('video_likes')),
|
||||
}
|
||||
'repost_count': int_or_none(video_data.get('video_shares')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class VVVVIDShowIE(VVVVIDIE):
|
||||
_VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vvvvid.it/show/156/psyco-pass',
|
||||
'info_dict': {
|
||||
'id': '156',
|
||||
'title': 'Psycho-Pass',
|
||||
'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
|
||||
},
|
||||
'playlist_count': 46,
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/156',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
seasons = self._download_info(
|
||||
show_id, 'seasons/', show_title)
|
||||
|
||||
show_info = self._download_info(
|
||||
show_id, 'info/', show_title, fatal=False)
|
||||
|
||||
entries = []
|
||||
for season in (seasons or []):
|
||||
season_number = int_or_none(season.get('number'))
|
||||
episodes = season.get('episodes') or []
|
||||
for episode in episodes:
|
||||
season_id = str_or_none(episode.get('season_id'))
|
||||
video_id = str_or_none(episode.get('video_id'))
|
||||
if not (season_id and video_id):
|
||||
continue
|
||||
info = self._extract_common_video_info(episode)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'ie_key': VVVVIDIE.ie_key(),
|
||||
'url': '/'.join([base_url, season_id, video_id]),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'season_number': season_number,
|
||||
'season_id': season_id,
|
||||
})
|
||||
entries.append(info)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, show_info.get('title'), show_info.get('description'))
|
||||
|
@@ -5,79 +5,34 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class WistiaIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
|
||||
class WistiaBaseIE(InfoExtractor):
|
||||
_VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
|
||||
_VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
|
||||
_EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'md5': 'cafeb56ec0c53c18c97405eecb3133df',
|
||||
'info_dict': {
|
||||
'id': 'sh7fpupwlt',
|
||||
'ext': 'mov',
|
||||
'title': 'Being Resourceful',
|
||||
'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
|
||||
'upload_date': '20131204',
|
||||
'timestamp': 1386185018,
|
||||
'duration': 117,
|
||||
},
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = WistiaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
urls.append(unescapeHTML(match.group('url')))
|
||||
for match in re.finditer(
|
||||
r'''(?sx)
|
||||
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
|
||||
''', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data_json = self._download_json(
|
||||
self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
|
||||
# Some videos require this.
|
||||
headers={
|
||||
'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
|
||||
def _download_embed_config(self, config_type, config_id, referer):
|
||||
base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
|
||||
embed_config = self._download_json(
|
||||
base_url + '.json', config_id, headers={
|
||||
'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
|
||||
})
|
||||
|
||||
if data_json.get('error'):
|
||||
if isinstance(embed_config, dict) and embed_config.get('error'):
|
||||
raise ExtractorError(
|
||||
'Error while getting the playlist', expected=True)
|
||||
|
||||
data = data_json['media']
|
||||
return embed_config
|
||||
|
||||
def _extract_media(self, embed_config):
|
||||
data = embed_config['media']
|
||||
video_id = data['hashedId']
|
||||
title = data['name']
|
||||
|
||||
formats = []
|
||||
@@ -160,3 +115,85 @@ class WistiaIE(InfoExtractor):
|
||||
'timestamp': int_or_none(data.get('createdAt')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class WistiaIE(WistiaBaseIE):
|
||||
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
|
||||
|
||||
_TESTS = [{
|
||||
# with hls video
|
||||
'url': 'wistia:807fafadvk',
|
||||
'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
|
||||
'info_dict': {
|
||||
'id': '807fafadvk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Drip Brennan Dunn Workshop',
|
||||
'description': 'a JV Webinars video',
|
||||
'upload_date': '20160518',
|
||||
'timestamp': 1463607249,
|
||||
'duration': 4987.11,
|
||||
},
|
||||
}, {
|
||||
'url': 'wistia:sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = WistiaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
urls.append(unescapeHTML(match.group('url')))
|
||||
for match in re.finditer(
|
||||
r'''(?sx)
|
||||
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
|
||||
''', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||
urls.append('wistia:%s' % match.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
embed_config = self._download_embed_config('media', video_id, url)
|
||||
return self._extract_media(embed_config)
|
||||
|
||||
|
||||
class WistiaPlaylistIE(WistiaBaseIE):
|
||||
_VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
|
||||
'info_dict': {
|
||||
'id': 'aodt9etokc',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist = self._download_embed_config('playlist', playlist_id, url)
|
||||
|
||||
entries = []
|
||||
for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
|
||||
embed_config = media.get('embed_config')
|
||||
if not embed_config:
|
||||
continue
|
||||
entries.append(self._extract_media(embed_config))
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@@ -1,23 +1,43 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class YandexDiskIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://yadi\.sk/[di]/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
yadi\.sk|
|
||||
disk\.yandex\.
|
||||
(?:
|
||||
az|
|
||||
by|
|
||||
co(?:m(?:\.(?:am|ge|tr))?|\.il)|
|
||||
ee|
|
||||
fr|
|
||||
k[gz]|
|
||||
l[tv]|
|
||||
md|
|
||||
t[jm]|
|
||||
u[az]|
|
||||
ru
|
||||
)
|
||||
)/(?:[di]/|public.*?\bhash=)(?P<id>[^/?#&]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://yadi.sk/i/VdOeDou8eZs6Y',
|
||||
'md5': '33955d7ae052f15853dc41f35f17581c',
|
||||
'md5': 'a4a8d52958c8fddcf9845935070402ae',
|
||||
'info_dict': {
|
||||
'id': 'VdOeDou8eZs6Y',
|
||||
'ext': 'mp4',
|
||||
@@ -27,92 +47,101 @@ class YandexDiskIE(InfoExtractor):
|
||||
'uploader_id': '300043621',
|
||||
'view_count': int,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
'url': 'https://yadi.sk/d/h3WAXvDS3Li3Ce',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
status = self._download_webpage(
|
||||
'https://disk.yandex.com/auth/status', video_id, query={
|
||||
'urlOrigin': url,
|
||||
'source': 'public',
|
||||
'md5': 'false',
|
||||
})
|
||||
|
||||
sk = self._search_regex(
|
||||
r'(["\'])sk(?:External)?\1\s*:\s*(["\'])(?P<value>(?:(?!\2).)+)\2',
|
||||
status, 'sk', group='value')
|
||||
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
store = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="store-prefetch"[^>]*>\s*({.+?})\s*</script>',
|
||||
webpage, 'store'), video_id)
|
||||
resource = store['resources'][store['rootResourceId']]
|
||||
|
||||
models = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script[^>]+id=["\']models-client[^>]+>\s*(\[.+?\])\s*</script',
|
||||
webpage, 'video JSON'),
|
||||
video_id)
|
||||
title = resource['name']
|
||||
meta = resource.get('meta') or {}
|
||||
|
||||
data = next(
|
||||
model['data'] for model in models
|
||||
if model.get('model') == 'resource')
|
||||
public_url = meta.get('short_url')
|
||||
if public_url:
|
||||
video_id = self._match_id(public_url)
|
||||
|
||||
video_hash = data['id']
|
||||
title = data['name']
|
||||
source_url = (self._download_json(
|
||||
'https://cloud-api.yandex.net/v1/disk/public/resources/download',
|
||||
video_id, query={'public_key': url}, fatal=False) or {}).get('href')
|
||||
video_streams = resource.get('videoStreams') or {}
|
||||
video_hash = resource.get('hash') or url
|
||||
environment = store.get('environment') or {}
|
||||
sk = environment.get('sk')
|
||||
yandexuid = environment.get('yandexuid')
|
||||
if sk and yandexuid and not (source_url and video_streams):
|
||||
self._set_cookie(domain, 'yandexuid', yandexuid)
|
||||
|
||||
models = self._download_json(
|
||||
'https://disk.yandex.com/models/', video_id,
|
||||
data=urlencode_postdata({
|
||||
'_model.0': 'videoInfo',
|
||||
'id.0': video_hash,
|
||||
'_model.1': 'do-get-resource-url',
|
||||
'id.1': video_hash,
|
||||
'version': '13.6',
|
||||
'sk': sk,
|
||||
}), query={'_m': 'videoInfo'})['models']
|
||||
|
||||
videos = try_get(models, lambda x: x[0]['data']['videos'], list) or []
|
||||
source_url = try_get(
|
||||
models, lambda x: x[1]['data']['file'], compat_str)
|
||||
def call_api(action):
|
||||
return (self._download_json(
|
||||
urljoin(url, '/public/api/') + action, video_id, data=json.dumps({
|
||||
'hash': video_hash,
|
||||
'sk': sk,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'text/plain',
|
||||
}, fatal=False) or {}).get('data') or {}
|
||||
if not source_url:
|
||||
# TODO: figure out how to detect if download limit has
|
||||
# been reached and then avoid unnecessary source format
|
||||
# extraction requests
|
||||
source_url = call_api('download-url').get('url')
|
||||
if not video_streams:
|
||||
video_streams = call_api('get-video-streams')
|
||||
|
||||
formats = []
|
||||
if source_url:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': 'source',
|
||||
'ext': determine_ext(title, 'mp4'),
|
||||
'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'),
|
||||
'quality': 1,
|
||||
'filesize': int_or_none(meta.get('size'))
|
||||
})
|
||||
for video in videos:
|
||||
|
||||
for video in (video_streams.get('videos') or []):
|
||||
format_url = video.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
if video.get('dimension') == 'adaptive':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
format_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
size = video.get('size') or {}
|
||||
height = int_or_none(size.get('height'))
|
||||
format_id = 'hls'
|
||||
if height:
|
||||
format_id += '-%dp' % height
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'protocol': 'm3u8_native',
|
||||
'url': format_url,
|
||||
'width': int_or_none(size.get('width')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = float_or_none(try_get(
|
||||
models, lambda x: x[0]['data']['duration']), 1000)
|
||||
uploader = try_get(
|
||||
data, lambda x: x['user']['display_name'], compat_str)
|
||||
uploader_id = try_get(
|
||||
data, lambda x: x['user']['uid'], compat_str)
|
||||
view_count = int_or_none(try_get(
|
||||
data, lambda x: x['meta']['views_counter']))
|
||||
uid = resource.get('uid')
|
||||
display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'duration': float_or_none(video_streams.get('duration'), 1000),
|
||||
'uploader': display_name,
|
||||
'uploader_id': uid,
|
||||
'view_count': int_or_none(meta.get('views_counter')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -13,26 +14,30 @@ class YandexVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
|
||||
yandex\.ru(?:/(?:portal/(?:video|efir)|efir))?/?\?.*?stream_id=|
|
||||
frontend\.vh\.yandex\.ru/player/
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
(?P<id>(?:[\da-f]{32}|[\w-]{12}))
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
'md5': '33955d7ae052f15853dc41f35f17581c',
|
||||
'url': 'https://yandex.ru/portal/video?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374',
|
||||
'md5': 'e02a05bfaf0d9615ef07ae3a10f4faf4',
|
||||
'info_dict': {
|
||||
'id': '4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
'id': '4dbb36ec4e0526d58f9f2dc8f0ecf374',
|
||||
'ext': 'mp4',
|
||||
'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 0,
|
||||
'duration': 30,
|
||||
'title': 'Русский Вудсток - главный рок-фест в истории СССР / вДудь',
|
||||
'description': 'md5:7d6b8d4bc4a3b9a56499916c1ea5b5fa',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1549972939,
|
||||
'duration': 5575,
|
||||
'age_limit': 18,
|
||||
'upload_date': '20190212',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
|
||||
'url': 'https://yandex.ru/portal/efir?stream_id=4dbb262b4fe5cf15a215de4f34eee34d&from=morda',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
|
||||
@@ -52,53 +57,88 @@ class YandexVideoIE(InfoExtractor):
|
||||
# DASH with DRM
|
||||
'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://yandex.ru/efir?stream_active=watching&stream_id=v7a2dZ-v5mSI&from_block=efir_newtab',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
content = self._download_json(
|
||||
'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
|
||||
video_id, query={
|
||||
'stream_options': 'hires',
|
||||
'disable_trackings': 1,
|
||||
})['content']
|
||||
player = try_get((self._download_json(
|
||||
'https://frontend.vh.yandex.ru/graphql', video_id, data=b'''{
|
||||
player(content_id: "%s") {
|
||||
computed_title
|
||||
content_url
|
||||
description
|
||||
dislikes
|
||||
duration
|
||||
likes
|
||||
program_title
|
||||
release_date
|
||||
release_date_ut
|
||||
release_year
|
||||
restriction_age
|
||||
season
|
||||
start_time
|
||||
streams
|
||||
thumbnail
|
||||
title
|
||||
views_count
|
||||
}
|
||||
}''' % video_id.encode(), fatal=False)), lambda x: x['player']['content'])
|
||||
if not player or player.get('error'):
|
||||
player = self._download_json(
|
||||
'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id,
|
||||
video_id, query={
|
||||
'stream_options': 'hires',
|
||||
'disable_trackings': 1,
|
||||
})
|
||||
content = player['content']
|
||||
|
||||
content_url = url_or_none(content.get('content_url')) or url_or_none(
|
||||
content['streams'][0]['url'])
|
||||
title = content.get('title') or content.get('computed_title')
|
||||
title = content.get('title') or content['computed_title']
|
||||
|
||||
ext = determine_ext(content_url)
|
||||
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
elif ext == 'mpd':
|
||||
formats = self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash')
|
||||
else:
|
||||
formats = [{'url': content_url}]
|
||||
formats = []
|
||||
streams = content.get('streams') or []
|
||||
streams.append({'url': content.get('content_url')})
|
||||
for stream in streams:
|
||||
content_url = url_or_none(stream.get('url'))
|
||||
if not content_url:
|
||||
continue
|
||||
ext = determine_ext(content_url)
|
||||
if ext == 'ismc':
|
||||
continue
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
content_url, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
content_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({'url': content_url})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = content.get('description')
|
||||
thumbnail = content.get('thumbnail')
|
||||
timestamp = (int_or_none(content.get('release_date'))
|
||||
or int_or_none(content.get('release_date_ut'))
|
||||
or int_or_none(content.get('start_time')))
|
||||
duration = int_or_none(content.get('duration'))
|
||||
series = content.get('program_title')
|
||||
age_limit = int_or_none(content.get('restriction_age'))
|
||||
season = content.get('season') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'description': content.get('description'),
|
||||
'thumbnail': content.get('thumbnail'),
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'age_limit': age_limit,
|
||||
'duration': int_or_none(content.get('duration')),
|
||||
'series': content.get('program_title'),
|
||||
'age_limit': int_or_none(content.get('restriction_age')),
|
||||
'view_count': int_or_none(content.get('views_count')),
|
||||
'like_count': int_or_none(content.get('likes')),
|
||||
'dislike_count': int_or_none(content.get('dislikes')),
|
||||
'season_number': int_or_none(season.get('season_number')),
|
||||
'season_id': season.get('id'),
|
||||
'release_year': int_or_none(content.get('release_year')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
|
||||
from ..swfinterp import SWFInterpreter
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_unquote_plus,
|
||||
@@ -279,6 +280,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||
|
||||
def _call_api(self, ep, query, video_id):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
@@ -296,10 +298,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_yt_initial_data(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
|
||||
(r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
|
||||
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
|
||||
video_id)
|
||||
|
||||
def _extract_ytcfg(self, video_id, webpage):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), video_id, fatal=False)
|
||||
|
||||
|
||||
class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com'
|
||||
@@ -315,7 +323,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
|
||||
(?:(?:www|dev)\.)?invidio\.us/|
|
||||
(?:(?:www|no)\.)?invidiou\.sh/|
|
||||
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
|
||||
(?:(?:www|fi)\.)?invidious\.snopyta\.org/|
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.mastodon\.host/|
|
||||
@@ -1096,6 +1104,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# another example of '};' in ytInitialData
|
||||
'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@@ -1316,17 +1333,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return self._parse_json(
|
||||
uppercase_escape(config), video_id, fatal=False)
|
||||
|
||||
def _get_automatic_captions(self, video_id, webpage):
|
||||
def _get_automatic_captions(self, video_id, player_response, player_config):
|
||||
"""We need the webpage for getting the captions url, pass it as an
|
||||
argument to speed up the process."""
|
||||
self.to_screen('%s: Looking for automatic captions' % video_id)
|
||||
player_config = self._get_ytplayer_config(video_id, webpage)
|
||||
err_msg = 'Couldn\'t find automatic captions for %s' % video_id
|
||||
if not player_config:
|
||||
if not (player_response or player_config):
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
try:
|
||||
args = player_config['args']
|
||||
args = player_config.get('args') if player_config else {}
|
||||
caption_url = args.get('ttsurl')
|
||||
if caption_url:
|
||||
timestamp = args['timestamp']
|
||||
@@ -1385,19 +1401,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return captions
|
||||
|
||||
# New captions format as of 22.06.2017
|
||||
player_response = args.get('player_response')
|
||||
if player_response and isinstance(player_response, compat_str):
|
||||
player_response = self._parse_json(
|
||||
player_response, video_id, fatal=False)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
if player_response:
|
||||
renderer = player_response['captions']['playerCaptionsTracklistRenderer']
|
||||
base_url = renderer['captionTracks'][0]['baseUrl']
|
||||
sub_lang_list = []
|
||||
for lang in renderer['translationLanguages']:
|
||||
lang_code = lang.get('languageCode')
|
||||
if lang_code:
|
||||
sub_lang_list.append(lang_code)
|
||||
return make_captions(base_url, sub_lang_list)
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
@@ -1646,6 +1658,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
ytplayer_config = None
|
||||
|
||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||
age_gate = True
|
||||
@@ -1699,7 +1712,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info and not player_response:
|
||||
player_response = extract_player_response(
|
||||
self._search_regex(
|
||||
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||
(r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
|
||||
'initial player response', default='{}'),
|
||||
video_id)
|
||||
@@ -2270,7 +2283,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
|
||||
|
||||
video_duration = try_get(
|
||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||
@@ -2283,16 +2296,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# annotations
|
||||
video_annotations = None
|
||||
if self._downloader.params.get('writeannotations', False):
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
xsrf_token = None
|
||||
ytcfg = self._extract_ytcfg(video_id, video_webpage)
|
||||
if ytcfg:
|
||||
xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
|
||||
if not xsrf_token:
|
||||
xsrf_token = self._search_regex(
|
||||
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
|
||||
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
|
||||
invideo_url = try_get(
|
||||
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
|
||||
if xsrf_token and invideo_url:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
xsrf_field_name = None
|
||||
if ytcfg:
|
||||
xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
|
||||
if not xsrf_field_name:
|
||||
xsrf_field_name = self._search_regex(
|
||||
r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
|
||||
video_webpage, 'xsrf field name',
|
||||
group='xsrf_field_name', default='session_token')
|
||||
video_annotations = self._download_webpage(
|
||||
self._proto_relative_url(invideo_url),
|
||||
video_id, note='Downloading annotations',
|
||||
@@ -2431,7 +2453,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
)/
|
||||
(?:
|
||||
(?:channel|c|user|feed)/|
|
||||
(?:playlist|watch)\?.*?\blist=
|
||||
(?:playlist|watch)\?.*?\blist=|
|
||||
(?!(?:watch|embed|v|e)\b)
|
||||
)
|
||||
(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
@@ -2700,13 +2723,27 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
# inline playlist with not always working continuations
|
||||
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
|
||||
'only_matching': True,
|
||||
}
|
||||
# TODO
|
||||
# {
|
||||
# 'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
# 'only_matching': True,
|
||||
# }
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/course',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/zsecurity',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.youtube.com/NASAgovVideo/videos',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if YoutubeIE.suitable(url) else super(
|
||||
YoutubeTabIE, cls).suitable(url)
|
||||
|
||||
def _extract_channel_id(self, webpage):
|
||||
channel_id = self._html_search_meta(
|
||||
@@ -2988,10 +3025,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
for page_num in itertools.count(1):
|
||||
if not continuation:
|
||||
break
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/browse_ajax', None,
|
||||
'Downloading page %d' % page_num,
|
||||
headers=headers, query=continuation, fatal=False)
|
||||
count = 0
|
||||
retries = 3
|
||||
while count <= retries:
|
||||
try:
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# that is usually worked around with a retry
|
||||
browse = self._download_json(
|
||||
'https://www.youtube.com/browse_ajax', None,
|
||||
'Downloading page %d%s'
|
||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||
headers=headers, query=continuation)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||
count += 1
|
||||
if count <= retries:
|
||||
continue
|
||||
raise
|
||||
if not browse:
|
||||
break
|
||||
response = try_get(browse, lambda x: x[1]['response'], dict)
|
||||
@@ -3130,10 +3181,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
playlist_title=title)
|
||||
|
||||
def _extract_identity_token(self, webpage, item_id):
|
||||
ytcfg = self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), item_id, fatal=False)
|
||||
ytcfg = self._extract_ytcfg(item_id, webpage)
|
||||
if ytcfg:
|
||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||
if token:
|
||||
|
@@ -85,7 +85,13 @@ class ZypeIE(InfoExtractor):
|
||||
else:
|
||||
m3u8_url = self._search_regex(
|
||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
|
||||
body, 'm3u8 url', group='url')
|
||||
body, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
source = self._parse_json(self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
|
||||
'source'), video_id, js_to_json)
|
||||
if source.get('integration') == 'verizon-media':
|
||||
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
text_tracks = self._search_regex(
|
||||
|
@@ -3640,7 +3640,7 @@ def url_or_none(url):
|
||||
if not url or not isinstance(url, compat_str):
|
||||
return None
|
||||
url = url.strip()
|
||||
return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
|
||||
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||
|
||||
|
||||
def parse_duration(s):
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2020.12.14'
|
||||
__version__ = '2020.12.31'
|
||||
|
Reference in New Issue
Block a user