mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-14 12:18:37 +09:00
Compare commits
133 Commits
97c5be383c
...
2020.12.14
Author | SHA1 | Date | |
---|---|---|---|
![]() |
755f186e21 | ||
![]() |
2240a1dc4d | ||
![]() |
03d3af9768 | ||
![]() |
5ce9527e16 | ||
![]() |
c527f5ada0 | ||
![]() |
ace52668f0 | ||
![]() |
9c33eb027e | ||
![]() |
679b711395 | ||
![]() |
1727541315 | ||
![]() |
45b0a0d11b | ||
![]() |
e665fcd4da | ||
![]() |
aae737d4af | ||
![]() |
92a6de861e | ||
![]() |
5ff881aee6 | ||
![]() |
eae19a4473 | ||
![]() |
f70c263ce5 | ||
![]() |
92d135921f | ||
![]() |
d8008dee4f | ||
![]() |
bb38a12157 | ||
![]() |
bcc8ef0a5a | ||
![]() |
3cb1a5dc73 | ||
![]() |
ed40c02c9b | ||
![]() |
b5fdceb4d1 | ||
![]() |
1e6f7f3b45 | ||
![]() |
469ff02f4e | ||
![]() |
56f1c5ad38 | ||
![]() |
3a6b8f4edf | ||
![]() |
3ae02d4a62 | ||
![]() |
59fea63dc2 | ||
![]() |
22933e02d4 | ||
![]() |
98a62d7cbd | ||
![]() |
d6c5fdef34 | ||
![]() |
4b9051cf39 | ||
![]() |
00f5068908 | ||
![]() |
432c6b0f48 | ||
![]() |
ad5e5788ff | ||
![]() |
dc65041c22 | ||
![]() |
9f88b07945 | ||
![]() |
225646c4ca | ||
![]() |
5c6e9f0b6c | ||
![]() |
6c370bc149 | ||
![]() |
37fd242342 | ||
![]() |
aee1f87168 | ||
![]() |
b69bb1ed11 | ||
![]() |
772cefef8c | ||
![]() |
842654b6d0 | ||
![]() |
df5e50954b | ||
![]() |
a4a2fa8754 | ||
![]() |
9da0504a09 | ||
![]() |
470cf496f5 | ||
![]() |
e029da9add | ||
![]() |
e00b8f60d4 | ||
![]() |
644c3ef886 | ||
![]() |
9d8d0f8b4a | ||
![]() |
5a1fbbf8b7 | ||
![]() |
e2bdf8bf4f | ||
![]() |
c368dc98e0 | ||
![]() |
e7eff914cd | ||
![]() |
07333d0062 | ||
![]() |
5bd7ad2e81 | ||
![]() |
3ded751985 | ||
![]() |
6956db3606 | ||
![]() |
17b01228f8 | ||
![]() |
4f1ecca58d | ||
![]() |
2717036489 | ||
![]() |
d9482c0083 | ||
![]() |
791b743765 | ||
![]() |
fa604d9083 | ||
![]() |
2bf0634d16 | ||
![]() |
dccf4932e1 | ||
![]() |
91dd25fe1e | ||
![]() |
06bf2ac20f | ||
![]() |
6ad0d8781e | ||
![]() |
f2c704e112 | ||
![]() |
5e822c2526 | ||
![]() |
cc017e07ca | ||
![]() |
082da36416 | ||
![]() |
6bf95b15ee | ||
![]() |
4c93b2fd15 | ||
![]() |
1b26bfd425 | ||
![]() |
13ec444a98 | ||
![]() |
51579d87e4 | ||
![]() |
e147619669 | ||
![]() |
e7f93fbd85 | ||
![]() |
58f7ada235 | ||
![]() |
c67b33888f | ||
![]() |
e8c0af04b7 | ||
![]() |
5d769860c3 | ||
![]() |
02b04785ee | ||
![]() |
41c92b8d02 | ||
![]() |
fe0c28f956 | ||
![]() |
957c65b9ea | ||
![]() |
5e95e18ce9 | ||
![]() |
e91df0c550 | ||
![]() |
c5636e9bca | ||
![]() |
2e47264235 | ||
![]() |
1c78cb118c | ||
![]() |
beab2f88c9 | ||
![]() |
0025447369 | ||
![]() |
da4304609d | ||
![]() |
ea89680aea | ||
![]() |
664dd8ba85 | ||
![]() |
64554c12e1 | ||
![]() |
4ded9c0f00 | ||
![]() |
c0820dd52a | ||
![]() |
2bb70750a9 | ||
![]() |
09d923f2eb | ||
![]() |
37d979ad33 | ||
![]() |
95ac4de229 | ||
![]() |
d3e142b3fa | ||
![]() |
132aece1ed | ||
![]() |
3e4e338133 | ||
![]() |
be19ae11fd | ||
![]() |
59d63d8d4a | ||
![]() |
cfeba5d17f | ||
![]() |
6da0e5e7a2 | ||
![]() |
d6ce649f15 | ||
![]() |
b449b73dcc | ||
![]() |
16c822e91e | ||
![]() |
4318170779 | ||
![]() |
fb626c0586 | ||
![]() |
717d1d2d5a | ||
![]() |
9585b376db | ||
![]() |
f04cfe24e0 | ||
![]() |
20c50c6556 | ||
![]() |
f9f9699f2f | ||
![]() |
a3cf22e590 | ||
![]() |
99de2f38d3 | ||
![]() |
9fe50837c3 | ||
![]() |
4dc545553f | ||
![]() |
686e898fde | ||
![]() |
3a78198a96 | ||
![]() |
836c810716 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.11.24
|
||||
[debug] youtube-dl version 2020.12.14
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.11.24
|
||||
[debug] youtube-dl version 2020.12.14
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.12.14. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.12.14**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -7,8 +7,10 @@
|
||||
---
|
||||
|
||||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections
|
||||
- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
- [ ] Read [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site)
|
||||
- [ ] Read [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) and adjusted the code to meet them
|
||||
- [ ] Covered the code with tests (note that PRs without tests will be REJECTED)
|
||||
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
|
||||
|
||||
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
|
||||
|
26
.travis.yml
26
.travis.yml
@@ -12,29 +12,29 @@ python:
|
||||
dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
# - YTDL_TEST_SET=download
|
||||
jobs:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
# - python: 3.7
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- python: 3.8
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
# - python: 3.8
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
# - python: 3.8-dev
|
||||
# dist: xenial
|
||||
# env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||
- name: flake8
|
||||
python: 3.8
|
||||
dist: xenial
|
||||
@@ -42,9 +42,9 @@ jobs:
|
||||
script: flake8 .
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
- env: YTDL_TEST_SET=download
|
||||
# - env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
# - env: JYTHON=true; YTDL_TEST_SET=download
|
||||
before_install:
|
||||
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
|
||||
script: ./devscripts/run_tests.sh
|
||||
|
183
ChangeLog
183
ChangeLog
@@ -1,3 +1,186 @@
|
||||
version 2020.12.14
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve JSON-LD interaction statistic extraction (#23306)
|
||||
* [downloader/hls] Delegate manifests with media initialization to ffmpeg
|
||||
+ [extractor/common] Document duration meta field for playlists
|
||||
|
||||
Extractors
|
||||
* [mdr] Bypass geo restriction
|
||||
* [mdr] Improve extraction (#24346, #26873)
|
||||
* [yandexmusic:album] Improve album title extraction (#27418)
|
||||
* [eporner] Fix view count extraction and make optional (#23306)
|
||||
+ [eporner] Extend URL regular expression
|
||||
* [eporner] Fix hash extraction and extend _VALID_URL (#27396)
|
||||
* [slideslive] Use m3u8 entry protocol for m3u8 formats (#27400)
|
||||
* [twitcasting] Fix format extraction and improve info extraction (#24868)
|
||||
* [linuxacademy] Fix authentication and extraction (#21129, #26223, #27402)
|
||||
* [itv] Clean description from HTML tags (#27399)
|
||||
* [vlive] Sort live formats (#27404)
|
||||
* [hotstart] Fix and improve extraction
|
||||
* Fix format extraction (#26690)
|
||||
+ Extract thumbnail URL (#16079, #20412)
|
||||
+ Add support for country specific playlist URLs (#23496)
|
||||
* Select the last id in video URL (#26412)
|
||||
+ [youtube] Add some invidious instances (#27373)
|
||||
|
||||
|
||||
version 2020.12.12
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Improve thumbnail filename deducing (#26010, #27244)
|
||||
|
||||
Extractors
|
||||
+ [ruutu] Extract more metadata
|
||||
+ [ruutu] Detect non-free videos (#21154)
|
||||
* [ruutu] Authenticate format URLs (#21031, #26782)
|
||||
+ [ruutu] Add support for static.nelonenmedia.fi (#25412)
|
||||
+ [ruutu] Extend URL regular expression (#24839)
|
||||
+ [facebook] Add support archived live video URLs (#15859)
|
||||
* [wdr] Improve overall extraction
|
||||
+ [wdr] Extend subtitles extraction (#22672, #22723)
|
||||
+ [facebook] Add support for videos attached to Relay based story pages
|
||||
(#10795)
|
||||
+ [wdr:page] Add support for kinder.wdr.de (#27350)
|
||||
+ [facebook] Add another regular expression for handleServerJS
|
||||
* [facebook] Fix embed page extraction
|
||||
+ [facebook] Add support for Relay post pages (#26935)
|
||||
+ [facebook] Add support for watch videos (#22795, #27062)
|
||||
+ [facebook] Add support for group posts with multiple videos (#19131)
|
||||
* [itv] Fix series metadata extraction (#26897)
|
||||
- [itv] Remove old extraction method (#23177)
|
||||
* [facebook] Redirect mobile URLs to desktop URLs (#24831, #25624)
|
||||
+ [facebook] Add support for Relay based pages (#26823)
|
||||
* [facebook] Try to reduce unnecessary tahoe requests
|
||||
- [facebook] Remove hardcoded Chrome User-Agent (#18974, #25411, #26958,
|
||||
#27329)
|
||||
- [smotri] Remove extractor (#27358)
|
||||
- [beampro] Remove extractor (#17290, #22871, #23020, #23061, #26099)
|
||||
|
||||
|
||||
version 2020.12.09
|
||||
|
||||
Core
|
||||
* [extractor/common] Fix inline HTML5 media tags processing (#27345)
|
||||
|
||||
Extractors
|
||||
* [youtube:tab] Improve identity token extraction (#27197)
|
||||
* [youtube:tab] Make click tracking params on continuation optional
|
||||
* [youtube:tab] Delegate inline playlists to tab-based playlists (27298)
|
||||
+ [tubitv] Extract release year (#27317)
|
||||
* [amcnetworks] Fix free content extraction (#20354)
|
||||
+ [lbry:channel] Add support for channels (#25584)
|
||||
+ [lbry] Add support for short and embed URLs
|
||||
* [lbry] Fix channel metadata extraction
|
||||
+ [telequebec] Add support for video.telequebec.tv (#27339)
|
||||
* [telequebec] Fix extraction (#25733, #26883)
|
||||
+ [youtube:tab] Capture and output alerts (#27340)
|
||||
* [tvplay:home] Fix extraction (#21153)
|
||||
* [americastestkitchen] Fix Extraction and add support
|
||||
for Cook's Country and Cook's Illustrated (#17234, #27322)
|
||||
+ [slideslive] Add support for yoda service videos and extract subtitles
|
||||
(#27323)
|
||||
|
||||
|
||||
version 2020.12.07
|
||||
|
||||
Core
|
||||
* [extractor/common] Extract timestamp from Last-Modified header
|
||||
+ [extractor/common] Add support for dl8-* media tags (#27283)
|
||||
* [extractor/common] Fix media type extraction for HTML5 media tags
|
||||
in start/end form
|
||||
|
||||
Extractors
|
||||
* [aenetworks] Fix extraction (#23363, #23390, #26795, #26985)
|
||||
* Fix Fastly format extraction
|
||||
+ Add support for play and watch subdomains
|
||||
+ Extract series metadata
|
||||
* [youtube] Improve youtu.be extraction in non-existing playlists (#27324)
|
||||
+ [generic] Extract RSS video description, timestamp and itunes metadata
|
||||
(#27177)
|
||||
* [nrk] Reduce the number of instalments and episodes requests
|
||||
* [nrk] Improve extraction
|
||||
* Improve format extraction for old akamai formats
|
||||
+ Add is_live value to entry info dict
|
||||
* Request instalments only when available
|
||||
* Fix skole extraction
|
||||
+ [peertube] Extract fps
|
||||
+ [peertube] Recognize audio-only formats (#27295)
|
||||
|
||||
|
||||
version 2020.12.05
|
||||
|
||||
Core
|
||||
* [extractor/common] Improve Akamai HTTP format extraction
|
||||
* Allow m3u8 manifest without an additional audio format
|
||||
* Fix extraction for qualities starting with a number
|
||||
|
||||
Extractors
|
||||
* [teachable:course] Improve extraction (#24507, #27286)
|
||||
* [nrk] Improve error extraction
|
||||
* [nrktv:series] Improve extraction (#21926)
|
||||
* [nrktv:season] Improve extraction
|
||||
* [nrk] Improve format extraction and geo-restriction detection (#24221)
|
||||
* [pornhub] Handle HTTP errors gracefully (#26414)
|
||||
* [nrktv] Relax URL regular expression (#27299, #26185)
|
||||
+ [zdf] Extract webm formats (#26659)
|
||||
+ [gamespot] Extract DASH and HTTP formats
|
||||
+ [tver] Add support for tver.jp (#26662, #27284)
|
||||
+ [pornhub] Add support for pornhub.org (#27276)
|
||||
|
||||
|
||||
version 2020.12.02
|
||||
|
||||
Extractors
|
||||
+ [tva] Add support for qub.ca (#27235)
|
||||
+ [toggle] Detect DRM protected videos (#16479, #20805)
|
||||
+ [toggle] Add support for new MeWatch URLs (#27256)
|
||||
* [youtube:tab] Extract channels only from channels tab (#27266)
|
||||
+ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030,
|
||||
#18806, #23148, #24461, #26171, #26800, #27263)
|
||||
* [cspan] Pass Referer header with format's video URL (#26032, #25729)
|
||||
* [youtube] Improve age-gated videos extraction (#27259)
|
||||
+ [mediaset] Add support for movie URLs (#27240)
|
||||
* [yandexmusic] Refactor
|
||||
+ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284)
|
||||
* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762)
|
||||
|
||||
|
||||
version 2020.11.29
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Write static debug to stderr and respect quiet for dynamic debug
|
||||
(#14579, #22593)
|
||||
|
||||
Extractors
|
||||
* [drtv] Extend URL regular expression (#27243)
|
||||
* [tiktok] Fix extraction (#20809, #22838, #22850, #25987, #26281, #26411,
|
||||
#26639, #26776, #27237)
|
||||
+ [ina] Add support for mobile URLs (#27229)
|
||||
* [pornhub] Fix like and dislike count extraction (#27227, #27234)
|
||||
* [youtube] Improve yt initial player response extraction (#27216)
|
||||
* [videa] Fix extraction (#25650, #25973, #26301)
|
||||
|
||||
|
||||
version 2020.11.26
|
||||
|
||||
Core
|
||||
* [downloader/fragment] Set final file's mtime according to last fragment's
|
||||
Last-Modified header (#11718, #18384, #27138)
|
||||
|
||||
Extractors
|
||||
+ [spreaker] Add support for spreaker.com (#13480, #13877)
|
||||
* [vlive] Improve extraction for geo-restricted videos
|
||||
+ [vlive] Add support for post URLs (#27122, #27123)
|
||||
* [viki] Fix video API request (#27184)
|
||||
* [bbc] Fix BBC Three clip extraction
|
||||
* [bbc] Fix BBC News videos extraction
|
||||
+ [medaltv] Add support for medal.tv (#27149)
|
||||
* [youtube] Improve music metadata and license extraction (#26013)
|
||||
* [nrk] Fix extraction
|
||||
* [cda] Fix extraction (#17803, #24458, #24518, #26381)
|
||||
|
||||
|
||||
version 2020.11.24
|
||||
|
||||
Core
|
||||
|
@@ -1,4 +1,4 @@
|
||||
[](https://travis-ci.org/ytdl-org/youtube-dl)
|
||||
[](https://travis-ci.com/ytdl-org/youtube-dl)
|
||||
|
||||
youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
@@ -371,7 +371,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor authentication code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri, youku)
|
||||
--video-password PASSWORD Video password (vimeo, youku)
|
||||
|
||||
## Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass multiple-system operator (TV
|
||||
|
@@ -35,6 +35,8 @@
|
||||
- **adobetv:video**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **aenetworks:collection**
|
||||
- **aenetworks:show**
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
@@ -308,6 +310,7 @@
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **FujiTVFODPlus7**
|
||||
- **Funimation**
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
@@ -421,7 +424,8 @@
|
||||
- **la7.it**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **lbry.tv**
|
||||
- **lbry**
|
||||
- **lbry:channel**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
@@ -471,6 +475,7 @@
|
||||
- **massengeschmack.tv**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
- **MedalTV**
|
||||
- **media.ccc.de**
|
||||
- **media.ccc.de:lists**
|
||||
- **Medialaan**
|
||||
@@ -485,6 +490,7 @@
|
||||
- **META**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **mewatch**
|
||||
- **Mgoon**
|
||||
- **MGTV**: 芒果TV
|
||||
- **MiaoPai**
|
||||
@@ -495,8 +501,6 @@
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
- **Mixer:live**
|
||||
- **Mixer:vod**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MNetTV**
|
||||
@@ -708,6 +712,7 @@
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**
|
||||
- **Qub**
|
||||
- **Quickline**
|
||||
- **QuicklineLive**
|
||||
- **R7**
|
||||
@@ -810,10 +815,6 @@
|
||||
- **Slideshare**
|
||||
- **SlidesLive**
|
||||
- **Slutload**
|
||||
- **smotri**: Smotri.com
|
||||
- **smotri:broadcast**: Smotri.com broadcasts
|
||||
- **smotri:community**: Smotri.com community videos
|
||||
- **smotri:user**: Smotri.com user videos
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SonyLIV**
|
||||
@@ -839,6 +840,10 @@
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **Spreaker**
|
||||
- **SpreakerPage**
|
||||
- **SpreakerShow**
|
||||
- **SpreakerShowPage**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
@@ -890,6 +895,7 @@
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleQuebecSquat**
|
||||
- **TeleQuebecVideo**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TennisTV**
|
||||
@@ -907,7 +913,7 @@
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **TikTok**
|
||||
- **TikTokUser**
|
||||
- **TikTokUser** (Currently broken)
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
@@ -946,6 +952,7 @@
|
||||
- **TVANouvellesArticle**
|
||||
- **TVC**
|
||||
- **TVCArticle**
|
||||
- **TVer**
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVN24**
|
||||
@@ -1055,6 +1062,7 @@
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **vlive:post**
|
||||
- **Vodlocker**
|
||||
- **VODPl**
|
||||
- **VODPlatform**
|
||||
@@ -1126,6 +1134,8 @@
|
||||
- **yahoo:japannews**: Yahoo! Japan News
|
||||
- **YandexDisk**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
|
||||
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
- **yandexmusic:track**: Яндекс.Музыка - Трек
|
||||
- **YandexVideo**
|
||||
@@ -1152,6 +1162,7 @@
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:tab**: YouTube.com tab
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **YoutubeYtBe**
|
||||
- **YoutubeYtUser**
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
|
@@ -98,6 +98,55 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||
|
||||
def test_search_json_ld_realworld(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._search_json_ld(r'''<script type="application/ld+json">
|
||||
{
|
||||
"@context": "http://schema.org/",
|
||||
"@type": "VideoObject",
|
||||
"name": "1 On 1 With Kleio",
|
||||
"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
|
||||
"duration": "PT0H12M23S",
|
||||
"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
|
||||
"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
|
||||
"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
|
||||
"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
|
||||
"width": "1920",
|
||||
"height": "1080",
|
||||
"encodingFormat": "mp4",
|
||||
"bitrate": "6617kbps",
|
||||
"isFamilyFriendly": "False",
|
||||
"description": "Kleio Valentien",
|
||||
"uploadDate": "2015-12-05T21:24:35+01:00",
|
||||
"interactionStatistic": {
|
||||
"@type": "InteractionCounter",
|
||||
"interactionType": { "@type": "http://schema.org/WatchAction" },
|
||||
"userInteractionCount": 1120958
|
||||
}, "aggregateRating": {
|
||||
"@type": "AggregateRating",
|
||||
"ratingValue": "88",
|
||||
"ratingCount": "630",
|
||||
"bestRating": "100",
|
||||
"worstRating": "0"
|
||||
}, "actor": [{
|
||||
"@type": "Person",
|
||||
"name": "Kleio Valentien",
|
||||
"url": "https://www.eporner.com/pornstar/kleio-valentien/"
|
||||
}]}
|
||||
</script>''', None),
|
||||
{
|
||||
'title': '1 On 1 With Kleio',
|
||||
'description': 'Kleio Valentien',
|
||||
'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
|
||||
'timestamp': 1449347075,
|
||||
'duration': 743.0,
|
||||
'view_count': 1120958,
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
})
|
||||
|
||||
def test_download_json(self):
|
||||
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
|
||||
self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
|
||||
@@ -108,6 +157,18 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
|
||||
|
||||
def test_parse_html5_media_entries(self):
|
||||
# inline video tag
|
||||
expect_dict(
|
||||
self,
|
||||
self.ie._parse_html5_media_entries(
|
||||
'https://127.0.0.1/video.html',
|
||||
r'<html><video src="/vid.mp4" /></html>', None)[0],
|
||||
{
|
||||
'formats': [{
|
||||
'url': 'https://127.0.0.1/vid.mp4',
|
||||
}],
|
||||
})
|
||||
|
||||
# from https://www.r18.com/
|
||||
# with kpbs in label
|
||||
expect_dict(
|
||||
|
@@ -1610,7 +1610,7 @@ class YoutubeDL(object):
|
||||
if req_format is None:
|
||||
req_format = self._default_format_spec(info_dict, download=download)
|
||||
if self.params.get('verbose'):
|
||||
self.to_stdout('[debug] Default format spec: %s' % req_format)
|
||||
self._write_string('[debug] Default format spec: %s\n' % req_format)
|
||||
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
|
||||
@@ -1871,7 +1871,7 @@ class YoutubeDL(object):
|
||||
for ph in self._progress_hooks:
|
||||
fd.add_progress_hook(ph)
|
||||
if self.params.get('verbose'):
|
||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
||||
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||
return fd.download(name, info)
|
||||
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
@@ -2410,7 +2410,7 @@ class YoutubeDL(object):
|
||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
||||
t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
|
||||
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||
|
@@ -42,11 +42,13 @@ class HlsFD(FragmentFD):
|
||||
# no segments will definitely be appended to the end of the playlist.
|
||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||
# # event media playlists [4]
|
||||
r'#EXT-X-MAP:', # media initialization [5]
|
||||
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
|
||||
)
|
||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||
|
@@ -5,20 +5,30 @@ import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class AENetworksBaseIE(ThePlatformIE):
|
||||
_BASE_URL_REGEX = r'''(?x)https?://
|
||||
(?:(?:www|play|watch)\.)?
|
||||
(?P<domain>
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/'''
|
||||
_THEPLATFORM_KEY = 'crazyjava'
|
||||
_THEPLATFORM_SECRET = 's3cr3t'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
'mylifetime.com': ('LIFETIME', 'lifetime'),
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
query = {'mbr': 'true'}
|
||||
@@ -31,7 +41,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
'assetTypes': 'high_video_s3'
|
||||
}, {
|
||||
'assetTypes': 'high_video_s3',
|
||||
'switch': 'hls_ingest_fastly'
|
||||
'switch': 'hls_high_fastly',
|
||||
}]
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@@ -61,20 +71,13 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
|
||||
shows/[^/]+/season-\d+/episode-\d+|
|
||||
(?:
|
||||
(?:movie|special)s/[^/]+|
|
||||
(?:shows/[^/]+/)?videos
|
||||
)/[^/?#&]+
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
'info_dict': {
|
||||
@@ -91,22 +94,23 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
||||
'info_dict': {
|
||||
'id': '71889446852',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
||||
'info_dict': {
|
||||
'id': 'SERIES4317',
|
||||
'title': 'Atlanta Plastic',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'only_matching': True
|
||||
'info_dict': {
|
||||
'id': '600587331957',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inlawful Entry',
|
||||
'description': 'md5:57c12115a2b384d883fe64ca50529e08',
|
||||
'timestamp': 1452634428,
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
@@ -117,80 +121,152 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
||||
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
'aetv.com': 'AETV',
|
||||
'mylifetime.com': 'LIFETIME',
|
||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
||||
'fyi.tv': 'FYI',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
if url_parts_len == 1:
|
||||
entries = []
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
else:
|
||||
# single season
|
||||
url_parts_len = 2
|
||||
if url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
episode_url = compat_urlparse.urljoin(
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
|
||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
||||
media_url = self._search_regex(
|
||||
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
||||
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
||||
webpage, 'video url', group='url')
|
||||
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
canonical, query={'filter[canonical]': '/' + canonical})['results'][0]
|
||||
title = result['title']
|
||||
video_id = result['id']
|
||||
media_url = result['publicUrl']
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
auth = None
|
||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
'series': result.get('seriesName'),
|
||||
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
def _call_api(self, resource, slug, brand, fields):
|
||||
return self._download_json(
|
||||
'https://yoga.appsvcs.aetnd.com/graphql',
|
||||
slug, query={'brand': brand}, data=urlencode_postdata({
|
||||
'query': '''{
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields),
|
||||
}))['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = re.match(self._VALID_URL, url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = 'http://watch.%s' % domain
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
doc = self._get_doc(item)
|
||||
canonical = doc.get('canonical')
|
||||
if not canonical:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
|
||||
|
||||
description = None
|
||||
if self._PLAYLIST_DESCRIPTION_KEY:
|
||||
description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist.get('id'),
|
||||
playlist.get(self._PLAYLIST_TITLE_KEY), description)
|
||||
|
||||
|
||||
class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||
IE_NAME = 'aenetworks:collection'
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
|
||||
'info_dict': {
|
||||
'id': '282',
|
||||
'title': 'America The Story of Us',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||
'only_matching': True
|
||||
}]
|
||||
_RESOURCE = 'list'
|
||||
_ITEMS_KEY = 'items'
|
||||
_PLAYLIST_TITLE_KEY = 'display_title'
|
||||
_PLAYLIST_DESCRIPTION_KEY = None
|
||||
_FIELDS = '''id
|
||||
display_title
|
||||
items {
|
||||
... on ListVideoItem {
|
||||
doc {
|
||||
canonical
|
||||
id
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _get_doc(self, item):
|
||||
return item.get('doc') or {}
|
||||
|
||||
|
||||
class AENetworksShowIE(AENetworksListBaseIE):
|
||||
IE_NAME = 'aenetworks:show'
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||
'info_dict': {
|
||||
'id': 'SH012427480000',
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
'playlist_mincount': 168,
|
||||
}]
|
||||
_RESOURCE = 'series'
|
||||
_ITEMS_KEY = 'episodes'
|
||||
_PLAYLIST_TITLE_KEY = 'title'
|
||||
_PLAYLIST_DESCRIPTION_KEY = 'description'
|
||||
_FIELDS = '''description
|
||||
id
|
||||
title
|
||||
episodes {
|
||||
canonical
|
||||
id
|
||||
}'''
|
||||
|
||||
def _get_doc(self, item):
|
||||
return item
|
||||
|
||||
|
||||
class HistoryTopicIE(AENetworksBaseIE):
|
||||
IE_NAME = 'history:topic'
|
||||
IE_DESC = 'History.com Topic'
|
||||
@@ -204,6 +280,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
'uploader': 'AENE-NEW',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -212,36 +289,8 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'add_ie': ['ThePlatform'],
|
||||
}]
|
||||
|
||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(
|
||||
update_url_query(theplatform_url, query),
|
||||
{
|
||||
'sig': {
|
||||
'key': self._THEPLATFORM_KEY,
|
||||
'secret': self._THEPLATFORM_SECRET,
|
||||
},
|
||||
'force_smil_url': True
|
||||
}),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/history/videos',
|
||||
video_id, query={'filter[id]': video_id})['results'][0]
|
||||
title = result['title']
|
||||
info = self._extract_aen_smil(result['publicUrl'], video_id)
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': result.get('description'),
|
||||
'duration': int_or_none(result.get('duration')),
|
||||
'timestamp': int_or_none(result.get('added'), 1000),
|
||||
})
|
||||
return info
|
||||
return self.url_result(
|
||||
'http://www.history.com/videos/' + display_id,
|
||||
AENetworksIE.ie_key())
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
@@ -11,25 +13,22 @@ from ..utils import (
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
||||
'md5': '',
|
||||
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||
'info_dict': {
|
||||
'id': 's3MX01Nl4vPH',
|
||||
'id': '4Lq1dzOnZGt0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maron - Season 4 - Step 1',
|
||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
||||
'age_limit': 17,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462468831,
|
||||
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||
'upload_date': '20201120',
|
||||
'timestamp': 1605904350,
|
||||
'uploader': 'AMCN',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires TV provider accounts',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@@ -55,32 +54,33 @@ class AMCNetworksIE(ThePlatformIE):
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_REQUESTOR_ID_MAP = {
|
||||
'amc': 'AMC',
|
||||
'bbcamerica': 'BBCA',
|
||||
'ifc': 'IFC',
|
||||
'sundancetv': 'SUNDANCE',
|
||||
'wetv': 'WETV',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
properties = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
|
||||
display_id)['data']['properties']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
media_url = self._search_regex(
|
||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'media url')
|
||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||
r'link\.theplatform\.com/s/([^?]+)',
|
||||
media_url, 'theplatform_path'), display_id)
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
auth_required = self._search_regex(
|
||||
r'window\.authRequired\s*=\s*(true|false);',
|
||||
webpage, 'auth required')
|
||||
if auth_required == 'true':
|
||||
requestor_id = self._search_regex(
|
||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
||||
webpage, 'requestor id')
|
||||
if properties.get('videoCategory') == 'TVE-Auth':
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
|
@@ -1,33 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
'info_dict': {
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Weeknight Japanese Suppers',
|
||||
'title': 'Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
||||
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'release_date': '20180414',
|
||||
'release_date': '20180410',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 18,
|
||||
'episode': 'Weeknight Japanese Suppers',
|
||||
'episode': 'Japanese Suppers',
|
||||
'episode_number': 15,
|
||||
},
|
||||
'params': {
|
||||
@@ -36,47 +36,31 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
resource_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
is_episode = resource_type == 'episode'
|
||||
if is_episode:
|
||||
resource_type = 'episodes'
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id, js_to_json)
|
||||
|
||||
ep_data = try_get(
|
||||
video_data,
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
|
||||
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
'description') or ep_meta.get('description'))
|
||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
||||
|
||||
season_number = int_or_none(ep_meta.get('season_number'))
|
||||
episode = ep_meta.get('title')
|
||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
||||
resource = self._download_json(
|
||||
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||
video = resource['video'] if is_episode else resource
|
||||
episode = resource if is_episode else resource.get('episode') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'ie_key': 'Zype',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'release_date': release_date,
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'description': clean_html(video.get('description')),
|
||||
'release_date': unified_strdate(video.get('publishDate')),
|
||||
'series': try_get(episode, lambda x: x['show']['title']),
|
||||
'episode': episode.get('title'),
|
||||
}
|
||||
|
@@ -1,194 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BeamProBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://mixer.com/api/v1'
|
||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
||||
|
||||
def _extract_channel_info(self, chan):
|
||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
||||
return {
|
||||
'uploader': chan.get('token') or try_get(
|
||||
chan, lambda x: x['user']['username'], compat_str),
|
||||
'uploader_id': compat_str(user_id) if user_id else None,
|
||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
||||
}
|
||||
|
||||
|
||||
class BeamProLiveIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:live'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'http://mixer.com/niterhayven',
|
||||
'info_dict': {
|
||||
'id': '261562',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
||||
'thumbnail': r're:https://.*\.jpg$',
|
||||
'timestamp': 1483477281,
|
||||
'upload_date': '20170103',
|
||||
'uploader': 'niterhayven',
|
||||
'uploader_id': '373396',
|
||||
'age_limit': 18,
|
||||
'is_live': True,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'niterhayven is offline',
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
|
||||
chan = self._download_json(
|
||||
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
||||
|
||||
if chan.get('online') is False:
|
||||
raise ExtractorError(
|
||||
'{0} is offline'.format(channel_name), expected=True)
|
||||
|
||||
channel_id = chan['id']
|
||||
|
||||
def manifest_url(kind):
|
||||
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
||||
fatal=False)
|
||||
formats.extend(self._extract_smil_formats(
|
||||
manifest_url('smil'), channel_name, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': compat_str(chan.get('id') or channel_name),
|
||||
'title': self._live_title(chan.get('name') or channel_name),
|
||||
'description': clean_html(chan.get('description')),
|
||||
'thumbnail': try_get(
|
||||
chan, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
||||
'is_live': True,
|
||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(chan))
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class BeamProVodIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:vod'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||
'info_dict': {
|
||||
'id': '2259830',
|
||||
'ext': 'mp4',
|
||||
'title': 'willow8714\'s Channel',
|
||||
'duration': 6828.15,
|
||||
'thumbnail': r're:https://.*source\.png$',
|
||||
'timestamp': 1494046474,
|
||||
'upload_date': '20170506',
|
||||
'uploader': 'willow8714',
|
||||
'uploader_id': '6085379',
|
||||
'age_limit': 13,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_format(vod, vod_type):
|
||||
if not vod.get('baseUrl'):
|
||||
return []
|
||||
|
||||
if vod_type == 'hls':
|
||||
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
||||
elif vod_type == 'raw':
|
||||
filename, protocol = 'source.mp4', 'https'
|
||||
else:
|
||||
assert False
|
||||
|
||||
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
||||
|
||||
format_id = [vod_type]
|
||||
if isinstance(data.get('Height'), compat_str):
|
||||
format_id.append('%sp' % data['Height'])
|
||||
|
||||
return [{
|
||||
'url': urljoin(vod['baseUrl'], filename),
|
||||
'format_id': '-'.join(format_id),
|
||||
'ext': 'mp4',
|
||||
'protocol': protocol,
|
||||
'width': int_or_none(data.get('Width')),
|
||||
'height': int_or_none(data.get('Height')),
|
||||
'fps': int_or_none(data.get('Fps')),
|
||||
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vod_id = self._match_id(url)
|
||||
|
||||
vod_info = self._download_json(
|
||||
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
||||
|
||||
state = vod_info.get('state')
|
||||
if state != 'AVAILABLE':
|
||||
raise ExtractorError(
|
||||
'VOD %s is not available (state: %s)' % (vod_id, state),
|
||||
expected=True)
|
||||
|
||||
formats = []
|
||||
thumbnail_url = None
|
||||
|
||||
for vod in vod_info['vods']:
|
||||
vod_type = vod.get('format')
|
||||
if vod_type in ('hls', 'raw'):
|
||||
formats.extend(self._extract_format(vod, vod_type))
|
||||
elif vod_type == 'thumbnail':
|
||||
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'title': vod_info.get('name') or vod_id,
|
||||
'duration': float_or_none(vod_info.get('duration')),
|
||||
'thumbnail': thumbnail_url,
|
||||
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
||||
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
||||
'formats': formats,
|
||||
}
|
||||
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
||||
|
||||
return info
|
@@ -336,8 +336,8 @@ class InfoExtractor(object):
|
||||
object, each element of which is a valid dictionary by this specification.
|
||||
|
||||
Additionally, playlists can have "id", "title", "description", "uploader",
|
||||
"uploader_id", "uploader_url" attributes with the same semantics as videos
|
||||
(see above).
|
||||
"uploader_id", "uploader_url", "duration" attributes with the same semantics
|
||||
as videos (see above).
|
||||
|
||||
|
||||
_type "multi_video" indicates that there are multiple videos that
|
||||
@@ -1237,8 +1237,16 @@ class InfoExtractor(object):
|
||||
'ViewAction': 'view',
|
||||
}
|
||||
|
||||
def extract_interaction_type(e):
|
||||
interaction_type = e.get('interactionType')
|
||||
if isinstance(interaction_type, dict):
|
||||
interaction_type = interaction_type.get('@type')
|
||||
return str_or_none(interaction_type)
|
||||
|
||||
def extract_interaction_statistic(e):
|
||||
interaction_statistic = e.get('interactionStatistic')
|
||||
if isinstance(interaction_statistic, dict):
|
||||
interaction_statistic = [interaction_statistic]
|
||||
if not isinstance(interaction_statistic, list):
|
||||
return
|
||||
for is_e in interaction_statistic:
|
||||
@@ -1246,8 +1254,8 @@ class InfoExtractor(object):
|
||||
continue
|
||||
if is_e.get('@type') != 'InteractionCounter':
|
||||
continue
|
||||
interaction_type = is_e.get('interactionType')
|
||||
if not isinstance(interaction_type, compat_str):
|
||||
interaction_type = extract_interaction_type(is_e)
|
||||
if not interaction_type:
|
||||
continue
|
||||
# For interaction count some sites provide string instead of
|
||||
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||
@@ -2513,16 +2521,18 @@ class InfoExtractor(object):
|
||||
# amp-video and amp-audio are very similar to their HTML5 counterparts
|
||||
# so we wll include them right here (see
|
||||
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||
media_tags = [(media_tag, media_type, '')
|
||||
for media_tag, media_type
|
||||
in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
|
||||
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
||||
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
|
||||
media_tags = [(media_tag, media_tag_name, media_type, '')
|
||||
for media_tag, media_tag_name, media_type
|
||||
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
||||
media_tags.extend(re.findall(
|
||||
# We only allow video|audio followed by a whitespace or '>'.
|
||||
# Allowing more characters may end up in significant slow down (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
|
||||
# http://www.porntrex.com/maps/videositemap.xml).
|
||||
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||
for media_tag, media_type, media_content in media_tags:
|
||||
r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage))
|
||||
for media_tag, _, media_type, media_content in media_tags:
|
||||
media_info = {
|
||||
'formats': [],
|
||||
'subtitles': {},
|
||||
@@ -2614,33 +2624,32 @@ class InfoExtractor(object):
|
||||
hls_host = hosts.get('hls')
|
||||
if hls_host:
|
||||
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and 'hdnea=' not in manifest_url:
|
||||
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
|
||||
if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
|
||||
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
|
||||
if len(m3u8_formats) in (qualities_length, qualities_length + 1):
|
||||
i = 0
|
||||
http_formats = []
|
||||
for f in formats:
|
||||
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
|
||||
for f in m3u8_formats:
|
||||
if f['vcodec'] != 'none':
|
||||
for protocol in ('http', 'https'):
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_url = re.sub(
|
||||
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
|
||||
REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||
'url': http_url,
|
||||
'protocol': protocol,
|
||||
})
|
||||
http_formats.append(http_f)
|
||||
formats.append(http_f)
|
||||
i += 1
|
||||
formats.extend(http_formats)
|
||||
|
||||
return formats
|
||||
|
||||
|
@@ -10,6 +10,8 @@ from ..utils import (
|
||||
find_xpath_attr,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
)
|
||||
@@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
|
||||
bc_attr['data-bcid'])
|
||||
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||
|
||||
def add_referer(formats):
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['Referer'] = url
|
||||
|
||||
# As of 01.12.2020 this path looks to cover all cases making the rest
|
||||
# of the code unnecessary
|
||||
jwsetup = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if jwsetup:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||
base_url=url)
|
||||
add_referer(info['formats'])
|
||||
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||
return merge_dicts(info, ld_info)
|
||||
|
||||
# Obsolete
|
||||
# We first look for clipid, because clipprog always appears before
|
||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||
@@ -165,6 +187,7 @@ class CSpanIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||
add_referer(formats)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
|
@@ -29,7 +29,7 @@ class DRTVIE(InfoExtractor):
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
|
||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||
)
|
||||
(?P<id>[\da-z_-]+)
|
||||
'''
|
||||
@@ -111,6 +111,9 @@ class DRTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -16,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EpornerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||
@@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor):
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||
'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor):
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||
@@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor):
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
||||
webpage, 'view count', fatal=False))
|
||||
r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
|
||||
webpage, 'view count', default=None))
|
||||
|
||||
return merge_dicts(json_ld, {
|
||||
'id': video_id,
|
||||
|
@@ -30,6 +30,8 @@ from .adobetv import (
|
||||
from .adultswim import AdultSwimIE
|
||||
from .aenetworks import (
|
||||
AENetworksIE,
|
||||
AENetworksCollectionIE,
|
||||
AENetworksShowIE,
|
||||
HistoryTopicIE,
|
||||
)
|
||||
from .afreecatv import AfreecaTVIE
|
||||
@@ -89,10 +91,6 @@ from .bbc import (
|
||||
BBCCoUkPlaylistIE,
|
||||
BBCIE,
|
||||
)
|
||||
from .beampro import (
|
||||
BeamProLiveIE,
|
||||
BeamProVodIE,
|
||||
)
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
@@ -393,6 +391,7 @@ from .frontendmasters import (
|
||||
FrontendMastersLessonIE,
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .fujitv import FujiTVFODPlus7IE
|
||||
from .funimation import FunimationIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
@@ -531,7 +530,10 @@ from .laola1tv import (
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lbry import LBRYIE
|
||||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
@@ -1039,12 +1041,6 @@ from .sky import (
|
||||
from .slideshare import SlideshareIE
|
||||
from .slideslive import SlidesLiveIE
|
||||
from .slutload import SlutloadIE
|
||||
from .smotri import (
|
||||
SmotriIE,
|
||||
SmotriCommunityIE,
|
||||
SmotriUserIE,
|
||||
SmotriBroadcastIE,
|
||||
)
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sonyliv import SonyLIVIE
|
||||
@@ -1082,6 +1078,12 @@ from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
SpreakerShowIE,
|
||||
SpreakerShowPageIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .srgssr import (
|
||||
@@ -1143,6 +1145,7 @@ from .telequebec import (
|
||||
TeleQuebecSquatIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
TeleQuebecVideoIE,
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
@@ -1179,7 +1182,10 @@ from .tnaflix import (
|
||||
EMPFlixIE,
|
||||
MovieFapIE,
|
||||
)
|
||||
from .toggle import ToggleIE
|
||||
from .toggle import (
|
||||
ToggleIE,
|
||||
MeWatchIE,
|
||||
)
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutv import TouTvIE
|
||||
@@ -1212,7 +1218,10 @@ from .tv2dk import (
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
from .tva import (
|
||||
TVAIE,
|
||||
QubIE,
|
||||
)
|
||||
from .tvanouvelles import (
|
||||
TVANouvellesIE,
|
||||
TVANouvellesArticleIE,
|
||||
@@ -1221,6 +1230,7 @@ from .tvc import (
|
||||
TVCIE,
|
||||
TVCArticleIE,
|
||||
)
|
||||
from .tver import TVerIE
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvn24 import TVN24IE
|
||||
@@ -1375,6 +1385,7 @@ from .vk import (
|
||||
)
|
||||
from .vlive import (
|
||||
VLiveIE,
|
||||
VLivePostIE,
|
||||
VLiveChannelIE,
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
@@ -1471,6 +1482,8 @@ from .yandexmusic import (
|
||||
YandexMusicTrackIE,
|
||||
YandexMusicAlbumIE,
|
||||
YandexMusicPlaylistIE,
|
||||
YandexMusicArtistTracksIE,
|
||||
YandexMusicArtistAlbumsIE,
|
||||
)
|
||||
from .yandexvideo import YandexVideoIE
|
||||
from .yapfiles import YapFilesIE
|
||||
@@ -1503,6 +1516,7 @@ from .youtube import (
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
|
@@ -16,14 +16,17 @@ from ..utils import (
|
||||
clean_html,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
parse_count,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -39,7 +42,8 @@ class FacebookIE(InfoExtractor):
|
||||
photo\.php|
|
||||
video\.php|
|
||||
video/embed|
|
||||
story\.php
|
||||
story\.php|
|
||||
watch(?:/live)?/?
|
||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
@@ -54,8 +58,6 @@ class FacebookIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'facebook'
|
||||
IE_NAME = 'facebook'
|
||||
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
@@ -72,6 +74,7 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.video
|
||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
@@ -133,6 +136,7 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||
'info_dict': {
|
||||
@@ -147,6 +151,7 @@ class FacebookIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
@@ -174,6 +179,7 @@ class FacebookIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
@@ -193,18 +199,23 @@ class FacebookIE(InfoExtractor):
|
||||
'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.mediaset.currMedia.edges
|
||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'facebook:544765982287235',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video
|
||||
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
@@ -212,6 +223,7 @@ class FacebookIE(InfoExtractor):
|
||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.video
|
||||
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
||||
'info_dict': {
|
||||
'id': '359649331226507',
|
||||
@@ -222,7 +234,54 @@ class FacebookIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
|
||||
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
|
||||
'info_dict': {
|
||||
'id': '106560053808006',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/?v=647537299265662',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
|
||||
'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
|
||||
'info_dict': {
|
||||
'id': '10157667649866271',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
|
||||
'info_dict': {
|
||||
'id': '117576630041613',
|
||||
'ext': 'mp4',
|
||||
# TODO: title can be extracted from video page
|
||||
'title': 'Facebook video #117576630041613',
|
||||
'uploader_id': '189393014416438',
|
||||
'upload_date': '20201123',
|
||||
'timestamp': 1606162592,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
|
||||
'info_dict': {
|
||||
'id': '211567722618337',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook video #211567722618337',
|
||||
'uploader_id': '127875227654254',
|
||||
'upload_date': '20161122',
|
||||
'timestamp': 1479793574,
|
||||
},
|
||||
}, {
|
||||
# data.video.creation_story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@@ -305,23 +364,24 @@ class FacebookIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
def _extract_from_url(self, url, video_id):
|
||||
webpage = self._download_webpage(
|
||||
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
|
||||
|
||||
video_data = None
|
||||
|
||||
def extract_video_data(instances):
|
||||
video_data = []
|
||||
for item in instances:
|
||||
if item[1][0] == 'VideoConfig':
|
||||
if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
|
||||
video_item = item[2][0]
|
||||
if video_item.get('video_id'):
|
||||
return video_item['videoData']
|
||||
video_data.append(video_item['videoData'])
|
||||
return video_data
|
||||
|
||||
server_js_data = self._parse_json(self._search_regex(
|
||||
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
||||
'server js data', default='{}'), video_id, fatal=False)
|
||||
[r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
|
||||
webpage, 'server js data', default='{}'), video_id, fatal=False)
|
||||
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||
@@ -331,17 +391,111 @@ class FacebookIE(InfoExtractor):
|
||||
return extract_video_data(try_get(
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
|
||||
def process_formats(formats):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
|
||||
webpage, 'js data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||
r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
|
||||
], webpage, 'js data', default='{}'), video_id, js_to_json, False)
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
return webpage, False
|
||||
graphql_data = self._parse_json(self._search_regex(
|
||||
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);',
|
||||
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {}
|
||||
for require in (graphql_data.get('require') or []):
|
||||
if require[0] == 'RelayPrefetchedStreamCache':
|
||||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||
playable_url = video.get('playable_url' + suffix)
|
||||
if not playable_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'quality': q(format_id),
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
process_formats(formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
}
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
title = video.get('name')
|
||||
if title:
|
||||
info.update({
|
||||
'title': title,
|
||||
'description': description,
|
||||
})
|
||||
else:
|
||||
info['title'] = description or 'Facebook video #%s' % v_id
|
||||
entries.append(info)
|
||||
|
||||
def parse_attachment(attachment, key='media'):
|
||||
media = attachment.get(key) or {}
|
||||
if media.get('__typename') == 'Video':
|
||||
return parse_graphql_video(media)
|
||||
|
||||
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||
|
||||
nodes = data.get('nodes') or []
|
||||
node = data.get('node') or {}
|
||||
if not nodes and node:
|
||||
nodes.append(node)
|
||||
for node in nodes:
|
||||
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||
attachments = try_get(story, [
|
||||
lambda x: x['attached_story']['attachments'],
|
||||
lambda x: x['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||
for n in ns:
|
||||
parse_attachment(n)
|
||||
parse_attachment(attachment)
|
||||
|
||||
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
video = data.get('video') or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
lambda x: x['creation_story']['attachments']
|
||||
], list) or []
|
||||
for attachment in attachments:
|
||||
parse_attachment(attachment)
|
||||
if not entries:
|
||||
parse_graphql_video(video)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
if not video_data:
|
||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||
if m_msg is not None:
|
||||
raise ExtractorError(
|
||||
@@ -379,8 +533,19 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_data:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
subtitles = {}
|
||||
if len(video_data) > 1:
|
||||
entries = []
|
||||
for v in video_data:
|
||||
video_url = v[0].get('video_url')
|
||||
if not video_url:
|
||||
continue
|
||||
entries.append(self.url_result(urljoin(
|
||||
url, video_url), self.ie_key(), v[0].get('video_id')))
|
||||
return self.playlist_result(entries, video_id)
|
||||
video_data = video_data[0]
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for f in video_data:
|
||||
format_id = f['stream_type']
|
||||
if f and isinstance(f, dict):
|
||||
@@ -399,22 +564,14 @@ class FacebookIE(InfoExtractor):
|
||||
'url': src,
|
||||
'preference': preference,
|
||||
})
|
||||
dash_manifest = f[0].get('dash_manifest')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
extract_dash_manifest(f[0], formats)
|
||||
subtitles_src = f[0].get('subtitles_src')
|
||||
if subtitles_src:
|
||||
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
|
||||
self._sort_formats(formats)
|
||||
process_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||
@@ -454,35 +611,13 @@ class FacebookIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
return info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
|
||||
webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
|
||||
|
||||
if info_dict:
|
||||
return info_dict
|
||||
|
||||
if '/posts/' in url:
|
||||
video_id_json = self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
|
||||
default='')
|
||||
if video_id_json:
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||
for vid in self._parse_json(video_id_json, video_id)]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
# Single Video?
|
||||
video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
|
||||
return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||
else:
|
||||
_, info_dict = self._extract_from_url(
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
video_id, fatal_if_no_video=True)
|
||||
return info_dict
|
||||
return self._extract_from_url(real_url, video_id)
|
||||
|
||||
|
||||
class FacebookPluginsVideoIE(InfoExtractor):
|
||||
|
35
youtube_dl/extractor/fujitv.py
Normal file
35
youtube_dl/extractor/fujitv.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||
_BITRATE_MAP = {
|
||||
300: (320, 180),
|
||||
800: (640, 360),
|
||||
1200: (1280, 720),
|
||||
2000: (1280, 720),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
||||
for f in formats:
|
||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||
if wh:
|
||||
f.update({
|
||||
'width': wh[0],
|
||||
'height': wh[1],
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||
}
|
@@ -1,16 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .once import OnceIE
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
url_basename,
|
||||
dict_get,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
@@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
|
||||
'title': 'Arma 3 - Community Guide: SITREP I',
|
||||
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
||||
},
|
||||
'skip': 'manifest URL give HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||
'md5': '173ea87ad762cf5d3bf6163dceb255a6',
|
||||
'info_dict': {
|
||||
'id': 'gs-2300-6424837',
|
||||
'ext': 'mp4',
|
||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 downloads
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.gamespot.com/videos/embed/6439218/',
|
||||
'only_matching': True,
|
||||
@@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
data_video_json = self._search_regex(
|
||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
||||
data_video = self._parse_json(self._html_search_regex(
|
||||
r'data-video=(["\'])({.*?})\1', webpage,
|
||||
'video data', group=2), page_id)
|
||||
title = compat_urllib_parse_unquote(data_video['title'])
|
||||
streams = data_video['videoStreams']
|
||||
|
||||
manifest_url = None
|
||||
formats = []
|
||||
f4m_url = streams.get('f4m_stream')
|
||||
if f4m_url:
|
||||
manifest_url = f4m_url
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
|
||||
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
|
||||
|
||||
m3u8_url = streams.get('adaptive_stream')
|
||||
if m3u8_url:
|
||||
manifest_url = m3u8_url
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
progressive_url = dict_get(
|
||||
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
|
||||
if progressive_url and manifest_url:
|
||||
qualities_basename = self._search_regex(
|
||||
r'/([^/]+)\.csmil/',
|
||||
manifest_url, 'qualities basename', default=None)
|
||||
if qualities_basename:
|
||||
QUALITIES_RE = r'((,\d+)+,?)'
|
||||
qualities = self._search_regex(
|
||||
QUALITIES_RE, qualities_basename,
|
||||
'qualities', default=None)
|
||||
if qualities:
|
||||
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
|
||||
qualities.sort()
|
||||
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
|
||||
http_url_basename = url_basename(progressive_url)
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
f.update({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'format_id': f['format_id'].replace('hls', 'http'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
formats.append(f)
|
||||
else:
|
||||
for q in qualities:
|
||||
formats.append({
|
||||
'url': progressive_url.replace(
|
||||
http_url_basename, http_template % q),
|
||||
'ext': 'mp4',
|
||||
'format_id': 'http-%d' % q,
|
||||
'tbr': q,
|
||||
})
|
||||
for f in m3u8_formats:
|
||||
formats.append(f)
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_f.update({
|
||||
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||
'protocol': 'http',
|
||||
'url': f['url'].replace('.m3u8', '.mp4'),
|
||||
})
|
||||
formats.append(http_f)
|
||||
|
||||
onceux_json = self._search_regex(
|
||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
||||
if onceux_json:
|
||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
||||
if onceux_url:
|
||||
formats.extend(self._extract_once_formats(re.sub(
|
||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
|
||||
http_formats_preference=-1))
|
||||
mpd_url = streams.get('adaptive_dash')
|
||||
if mpd_url:
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
mpd_url, page_id, mpd_id='dash', fatal=False))
|
||||
|
||||
if not formats:
|
||||
for quality in ['sd', 'hd']:
|
||||
# It's actually a link to a flv file
|
||||
flv_url = streams.get('f4m_{0}'.format(quality))
|
||||
if flv_url is not None:
|
||||
formats.append({
|
||||
'url': flv_url,
|
||||
'ext': 'flv',
|
||||
'format_id': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': data_video['guid'],
|
||||
'id': data_video.get('guid') or page_id,
|
||||
'display_id': page_id,
|
||||
'title': compat_urllib_parse_unquote(data_video['title']),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
|
@@ -20,19 +20,23 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
is_html,
|
||||
js_to_json,
|
||||
KNOWN_EXTENSIONS,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
from .brightcove import (
|
||||
@@ -48,7 +52,6 @@ from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
@@ -198,11 +201,21 @@ class GenericIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
'info_dict': {
|
||||
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
'ext': 'm4v',
|
||||
'upload_date': '20150228',
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
'title': 'MSNBC Rachel Maddow (video)',
|
||||
'description': 're:.*her unique approach to storytelling.*',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'ext': 'mov',
|
||||
'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
|
||||
'title': 're:MSNBC Rachel Maddow',
|
||||
'description': 're:.*her unique approach to storytelling.*',
|
||||
'timestamp': int,
|
||||
'upload_date': compat_str,
|
||||
'duration': float,
|
||||
},
|
||||
}],
|
||||
},
|
||||
# RSS feed with enclosures and unsupported link URLs
|
||||
{
|
||||
@@ -2103,23 +2116,23 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||
'upload_date': '20170909',
|
||||
'timestamp': 1504915200,
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# {
|
||||
# # Zype embed
|
||||
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
# 'info_dict': {
|
||||
# 'id': '5b400b834b32992a310622b9',
|
||||
# 'ext': 'mp4',
|
||||
# 'title': 'Smoky Barbecue Favorites',
|
||||
# 'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
# 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||
# 'upload_date': '20170909',
|
||||
# 'timestamp': 1504915200,
|
||||
# },
|
||||
# 'add_ie': [ZypeIE.ie_key()],
|
||||
# 'params': {
|
||||
# 'skip_download': True,
|
||||
# },
|
||||
# },
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
@@ -2180,6 +2193,10 @@ class GenericIE(InfoExtractor):
|
||||
playlist_desc_el = doc.find('./channel/description')
|
||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||
|
||||
NS_MAP = {
|
||||
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||
}
|
||||
|
||||
entries = []
|
||||
for it in doc.findall('./channel/item'):
|
||||
next_url = None
|
||||
@@ -2195,10 +2212,33 @@ class GenericIE(InfoExtractor):
|
||||
if not next_url:
|
||||
continue
|
||||
|
||||
def itunes(key):
|
||||
return xpath_text(
|
||||
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
|
||||
default=None)
|
||||
|
||||
duration = itunes('duration')
|
||||
explicit = itunes('explicit')
|
||||
if explicit == 'true':
|
||||
age_limit = 18
|
||||
elif explicit == 'false':
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = None
|
||||
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': next_url,
|
||||
'title': it.find('title').text,
|
||||
'description': xpath_text(it, 'description', default=None),
|
||||
'timestamp': unified_timestamp(
|
||||
xpath_text(it, 'pubDate', default=None)),
|
||||
'duration': int_or_none(duration) or parse_duration(duration),
|
||||
'thumbnail': url_or_none(itunes('image')),
|
||||
'episode': itunes('title'),
|
||||
'episode_number': int_or_none(itunes('episode')),
|
||||
'season_number': int_or_none(itunes('season')),
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -2318,7 +2358,7 @@ class GenericIE(InfoExtractor):
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': self._generic_title(url),
|
||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
||||
'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
|
||||
}
|
||||
|
||||
# Check for direct link to a video
|
||||
@@ -2424,7 +2464,9 @@ class GenericIE(InfoExtractor):
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse_unquote(webpage)
|
||||
# FIXME: unescaping the whole page may break URLs, commenting out for now.
|
||||
# There probably should be a second run of generic extractor on unescaped webpage.
|
||||
# webpage = compat_urllib_parse_unquote(webpage)
|
||||
|
||||
# Unescape squarespace embeds to be detected by generic extractor,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||
@@ -2772,11 +2814,6 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded smotri.com player
|
||||
smotri_url = SmotriIE._extract_url(webpage)
|
||||
if smotri_url:
|
||||
return self.url_result(smotri_url, 'Smotri')
|
||||
|
||||
# Look for embedded Myvi.ru player
|
||||
myvi_url = MyviIE._extract_url(webpage)
|
||||
if myvi_url:
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
@@ -25,43 +26,50 @@ from ..utils import (
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _call_api_impl(self, path, video_id, query):
|
||||
def _call_api_impl(self, path, video_id, headers, query, data=None):
|
||||
st = int(time.time())
|
||||
exp = st + 6000
|
||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
response = self._download_json(
|
||||
'https://api.hotstar.com/' + path, video_id, headers={
|
||||
'hotstarauth': auth,
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'JIO',
|
||||
}, query=query)
|
||||
h = {'hotstarauth': auth}
|
||||
h.update(headers)
|
||||
return self._download_json(
|
||||
'https://api.hotstar.com/' + path,
|
||||
video_id, headers=h, query=query, data=data)
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
response = self._call_api_impl(path, video_id, {
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'JIO',
|
||||
}, {
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
})
|
||||
if response['statusCode'] != 'OK':
|
||||
raise ExtractorError(
|
||||
response['body']['message'], expected=True)
|
||||
return response['body']['results']
|
||||
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
return self._call_api_impl(path, video_id, {
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
})
|
||||
|
||||
def _call_api_v2(self, path, video_id):
|
||||
return self._call_api_impl(
|
||||
'%s/in/contents/%s' % (path, video_id), video_id, {
|
||||
'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
|
||||
'client': 'mweb',
|
||||
'clientVersion': '6.18.0',
|
||||
'deviceId': compat_str(uuid.uuid4()),
|
||||
'osName': 'Windows',
|
||||
'osVersion': '10',
|
||||
})
|
||||
def _call_api_v2(self, path, video_id, headers, query=None, data=None):
|
||||
h = {'X-Request-Id': compat_str(uuid.uuid4())}
|
||||
h.update(headers)
|
||||
try:
|
||||
return self._call_api_impl(
|
||||
path, video_id, h, query, data)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
if e.cause.code == 402:
|
||||
self.raise_login_required()
|
||||
message = self._parse_json(e.cause.read().decode(), video_id)['message']
|
||||
if message in ('Content not available in region', 'Country is not supported'):
|
||||
raise self.raise_geo_restricted(message)
|
||||
raise ExtractorError(message)
|
||||
raise e
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
# contentData
|
||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||
@@ -92,8 +100,13 @@ class HotStarIE(HotStarBaseIE):
|
||||
# only available via api v2
|
||||
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_DEVICE_ID = None
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -121,7 +134,30 @@ class HotStarIE(HotStarBaseIE):
|
||||
headers = {'Referer': url}
|
||||
formats = []
|
||||
geo_restricted = False
|
||||
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
||||
|
||||
if not self._USER_TOKEN:
|
||||
self._DEVICE_ID = compat_str(uuid.uuid4())
|
||||
self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
|
||||
'X-HS-Platform': 'PCTV',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'device_ids': [{
|
||||
'id': self._DEVICE_ID,
|
||||
'type': 'device_id',
|
||||
}],
|
||||
}).encode())['user_identity']
|
||||
|
||||
playback_sets = self._call_api_v2(
|
||||
'play/v2/playback/content/' + video_id, video_id, {
|
||||
'X-HS-Platform': 'web',
|
||||
'X-HS-AppVersion': '6.99.1',
|
||||
'X-HS-UserToken': self._USER_TOKEN,
|
||||
}, query={
|
||||
'device-id': self._DEVICE_ID,
|
||||
'desired-config': 'encryption:plain',
|
||||
'os-name': 'Windows',
|
||||
'os-version': '10',
|
||||
})['data']['playBackSets']
|
||||
for playback_set in playback_sets:
|
||||
if not isinstance(playback_set, dict):
|
||||
continue
|
||||
@@ -163,19 +199,22 @@ class HotStarIE(HotStarBaseIE):
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
image = try_get(video_data, lambda x: x['image']['h'], compat_str)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||
'formats': formats,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'channel_id': str_or_none(video_data.get('channelId')),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': video_data.get('seasonId'),
|
||||
'season_id': str_or_none(video_data.get('seasonId')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
@@ -183,7 +222,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
@@ -193,6 +232,9 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||
@@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://m.ina.fr/video/I12055569',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,29 +1,21 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_etree_register_namespace,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
get_element_by_class,
|
||||
JSON_LD_RE,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
url_or_none,
|
||||
xpath_with_ns,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
@@ -31,14 +23,18 @@ class ITVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||
_GEO_COUNTRIES = ['GB']
|
||||
_TESTS = [{
|
||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
||||
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||
'info_dict': {
|
||||
'id': '2a2936a0053',
|
||||
'ext': 'flv',
|
||||
'title': 'Home Movie',
|
||||
'id': '2a4547a0012',
|
||||
'ext': 'mp4',
|
||||
'title': 'Liar - Series 2 - Episode 6',
|
||||
'description': 'md5:d0f91536569dec79ea184f0a44cca089',
|
||||
'series': 'Liar',
|
||||
'season_number': 2,
|
||||
'episode_number': 6,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
@@ -61,220 +57,97 @@ class ITVIE(InfoExtractor):
|
||||
params = extract_attributes(self._search_regex(
|
||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||
|
||||
ns_map = {
|
||||
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
|
||||
'tem': 'http://tempuri.org/',
|
||||
'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
|
||||
'com': 'http://schemas.itv.com/2009/05/Common',
|
||||
}
|
||||
for ns, full_ns in ns_map.items():
|
||||
compat_etree_register_namespace(ns, full_ns)
|
||||
|
||||
def _add_ns(name):
|
||||
return xpath_with_ns(name, ns_map)
|
||||
|
||||
def _add_sub_element(element, name):
|
||||
return etree.SubElement(element, _add_ns(name))
|
||||
|
||||
production_id = (
|
||||
params.get('data-video-autoplay-id')
|
||||
or '%s#001' % (
|
||||
params.get('data-video-episode-id')
|
||||
or video_id.replace('a', '/')))
|
||||
|
||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
||||
_add_sub_element(req_env, 'soapenv:Header')
|
||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
||||
request = _add_sub_element(get_playlist, 'tem:request')
|
||||
_add_sub_element(request, 'itv:ProductionId').text = production_id
|
||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
||||
_add_sub_element(vodcrid, 'com:Id')
|
||||
_add_sub_element(request, 'itv:Partition')
|
||||
user_info = _add_sub_element(get_playlist, 'tem:userInfo')
|
||||
_add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
|
||||
_add_sub_element(user_info, 'itv:DM')
|
||||
_add_sub_element(user_info, 'itv:RevenueScienceValue')
|
||||
_add_sub_element(user_info, 'itv:SessionId')
|
||||
_add_sub_element(user_info, 'itv:SsoToken')
|
||||
_add_sub_element(user_info, 'itv:UserToken')
|
||||
site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
|
||||
_add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
|
||||
_add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
|
||||
_add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
|
||||
_add_sub_element(site_info, 'itv:Category')
|
||||
_add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
|
||||
_add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
|
||||
device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
|
||||
_add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
|
||||
player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
|
||||
_add_sub_element(player_info, 'itv:Version').text = '2'
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
|
||||
hmac = params['data-video-hmac']
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'text/xml; charset=utf-8',
|
||||
'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Safari',
|
||||
'model': '5',
|
||||
'os': {
|
||||
'name': 'Windows NT',
|
||||
'version': '6.1',
|
||||
'type': 'desktop'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes', 'outband-webvtt'],
|
||||
'max': ['hls', 'aes', 'outband-webvtt']
|
||||
},
|
||||
'platformTag': 'dotcom'
|
||||
}
|
||||
}).encode(), headers=headers)
|
||||
video_data = ios_playlist['Playlist']['Video']
|
||||
ios_base_url = video_data.get('Base')
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
def extract_subtitle(sub_url):
|
||||
ext = determine_ext(sub_url, 'ttml')
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': sub_url,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
resp_env = self._download_xml(
|
||||
params['data-playlist-url'], video_id,
|
||||
headers=headers, data=etree.tostring(req_env), fatal=False)
|
||||
if resp_env:
|
||||
playlist = xpath_element(resp_env, './/Playlist')
|
||||
if playlist is None:
|
||||
fault_code = xpath_text(resp_env, './/faultcode')
|
||||
fault_string = xpath_text(resp_env, './/faultstring')
|
||||
if fault_code == 'InvalidGeoRegion':
|
||||
self.raise_geo_restricted(
|
||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
||||
elif fault_code not in (
|
||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage),
|
||||
'episode_title': params.get('data-video-episode'),
|
||||
'series': params.get('data-video-title'),
|
||||
})
|
||||
for media_file in (video_data.get('MediaFiles') or []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
||||
info.update({
|
||||
'title': title,
|
||||
'episode_title': title,
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
||||
rtmp_url = media_files.attrib['base']
|
||||
|
||||
for media_file in media_files.findall('MediaFile'):
|
||||
play_path = xpath_text(media_file, 'URL')
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
||||
if caption_url.text:
|
||||
extract_subtitle(caption_url.text)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
||||
hmac = params.get('data-video-hmac')
|
||||
if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url):
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||
'Content-Type': 'application/json',
|
||||
'hmac': hmac.upper(),
|
||||
})
|
||||
ios_playlist = self._download_json(
|
||||
ios_playlist_url, video_id, data=json.dumps({
|
||||
'user': {
|
||||
'itvUserId': '',
|
||||
'entitlements': [],
|
||||
'token': ''
|
||||
},
|
||||
'device': {
|
||||
'manufacturer': 'Safari',
|
||||
'model': '5',
|
||||
'os': {
|
||||
'name': 'Windows NT',
|
||||
'version': '6.1',
|
||||
'type': 'desktop'
|
||||
}
|
||||
},
|
||||
'client': {
|
||||
'version': '4.1',
|
||||
'id': 'browser'
|
||||
},
|
||||
'variantAvailability': {
|
||||
'featureset': {
|
||||
'min': ['hls', 'aes', 'outband-webvtt'],
|
||||
'max': ['hls', 'aes', 'outband-webvtt']
|
||||
},
|
||||
'platformTag': 'dotcom'
|
||||
}
|
||||
}).encode(), headers=headers, fatal=False)
|
||||
if ios_playlist:
|
||||
video_data = ios_playlist.get('Playlist', {}).get('Video', {})
|
||||
ios_base_url = video_data.get('Base')
|
||||
for media_file in video_data.get('MediaFiles', []):
|
||||
href = media_file.get('Href')
|
||||
if not href:
|
||||
continue
|
||||
if ios_base_url:
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
})
|
||||
subs = video_data.get('Subtitles')
|
||||
if isinstance(subs, list):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if href:
|
||||
extract_subtitle(href)
|
||||
if not info.get('duration'):
|
||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info.update({
|
||||
subtitles = {}
|
||||
subs = video_data.get('Subtitles') or []
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
href = url_or_none(sub.get('Href'))
|
||||
if not href:
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': href,
|
||||
'ext': determine_ext(href, 'vtt'),
|
||||
})
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not info:
|
||||
json_ld = self._parse_json(self._search_regex(
|
||||
JSON_LD_RE, webpage, 'JSON-LD', '{}',
|
||||
group='json_ld'), video_id, fatal=False)
|
||||
if json_ld and json_ld.get('@type') == 'BreadcrumbList':
|
||||
for ile in (json_ld.get('itemListElement:') or []):
|
||||
item = ile.get('item:') or {}
|
||||
if item.get('@type') == 'TVEpisode':
|
||||
item['@context'] = 'http://schema.org'
|
||||
info = self._json_ld(item, video_id, fatal=False) or {}
|
||||
break
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
webpage_info = self._search_json_ld(webpage, video_id, default={})
|
||||
if not webpage_info.get('title'):
|
||||
webpage_info['title'] = self._html_search_regex(
|
||||
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or webpage_info['episode']
|
||||
|
||||
return merge_dicts(info, webpage_info)
|
||||
'duration': parse_duration(video_data.get('Duration')),
|
||||
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
|
||||
}, info)
|
||||
|
||||
|
||||
class ITVBTCCIE(InfoExtractor):
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,13 +11,73 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LBRYIE(InfoExtractor):
|
||||
IE_NAME = 'lbry.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||
class LBRYBaseIE(InfoExtractor):
|
||||
_BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
|
||||
_CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
|
||||
_OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
|
||||
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params, resource):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy',
|
||||
display_id, 'Downloading %s JSON metadata' % resource,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
|
||||
def _resolve_url(self, url, display_id, resource):
|
||||
return self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': url}, resource)[url]
|
||||
|
||||
def _permanent_url(self, url, claim_name, claim_id):
|
||||
return urljoin(url, '/%s:%s' % (claim_name, claim_id))
|
||||
|
||||
def _parse_stream(self, stream, url):
|
||||
stream_value = stream.get('value') or {}
|
||||
stream_type = stream_value.get('stream_type')
|
||||
source = stream_value.get('source') or {}
|
||||
media = stream_value.get(stream_type) or {}
|
||||
signing_channel = stream.get('signing_channel') or {}
|
||||
channel_name = signing_channel.get('name')
|
||||
channel_claim_id = signing_channel.get('claim_id')
|
||||
channel_url = None
|
||||
if channel_name and channel_claim_id:
|
||||
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
|
||||
|
||||
info = {
|
||||
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': stream_value.get('description'),
|
||||
'license': stream_value.get('license'),
|
||||
'timestamp': int_or_none(stream.get('timestamp')),
|
||||
'tags': stream_value.get('tags'),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': try_get(signing_channel, lambda x: x['value']['title']),
|
||||
'channel_id': channel_claim_id,
|
||||
'channel_url': channel_url,
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
}
|
||||
if stream_type == 'audio':
|
||||
info['vcodec'] = 'none'
|
||||
else:
|
||||
info.update({
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class LBRYIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
@@ -28,6 +89,8 @@ class LBRYIE(InfoExtractor):
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}
|
||||
}, {
|
||||
# Audio
|
||||
@@ -40,6 +103,12 @@ class LBRYIE(InfoExtractor):
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
'tags': list,
|
||||
'duration': 2570,
|
||||
'channel': 'The LBRY Foundation',
|
||||
'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'vcodec': 'none',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
@@ -47,45 +116,99 @@ class LBRYIE(InfoExtractor):
|
||||
}, {
|
||||
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/Episode-1:e7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
if display_id.startswith('$/'):
|
||||
display_id = display_id.split('/', 2)[-1].replace('/', ':')
|
||||
else:
|
||||
display_id = display_id.replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._resolve_url(uri, display_id, 'stream')
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
claim_id = result['claim_id']
|
||||
title = result_value['title']
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
info = self._parse_stream(result, url)
|
||||
info.update({
|
||||
'id': claim_id,
|
||||
'title': title,
|
||||
'url': streaming_url,
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class LBRYChannelIE(LBRYBaseIE):
|
||||
IE_NAME = 'lbry:channel'
|
||||
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
|
||||
_TESTS = [{
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0',
|
||||
'info_dict': {
|
||||
'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
|
||||
'title': 'The LBRY Foundation',
|
||||
'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
|
||||
},
|
||||
'playlist_count': 29,
|
||||
}, {
|
||||
'url': 'https://lbry.tv/@LBRYFoundation',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, claim_id, url, page):
|
||||
page += 1
|
||||
result = self._call_api_proxy(
|
||||
'claim_search', claim_id, {
|
||||
'channel_ids': [claim_id],
|
||||
'claim_type': 'stream',
|
||||
'no_totals': True,
|
||||
'page': page,
|
||||
'page_size': self._PAGE_SIZE,
|
||||
'stream_types': self._SUPPORTED_STREAM_TYPES,
|
||||
}, 'page %d' % page)
|
||||
for item in (result.get('items') or []):
|
||||
stream_claim_name = item.get('name')
|
||||
stream_claim_id = item.get('claim_id')
|
||||
if not (stream_claim_name and stream_claim_id):
|
||||
continue
|
||||
|
||||
info = self._parse_stream(item, url)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'id': stream_claim_id,
|
||||
'title': try_get(item, lambda x: x['value']['title']),
|
||||
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
|
||||
})
|
||||
yield info
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': [uri]})[uri]
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', display_id, {'uri': uri})['streaming_url']
|
||||
source = result_value.get('source') or {}
|
||||
media = result_value.get('video') or result_value.get('audio') or {}
|
||||
signing_channel = result_value.get('signing_channel') or {}
|
||||
|
||||
return {
|
||||
'id': result['claim_id'],
|
||||
'title': result_value['title'],
|
||||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': result_value.get('description'),
|
||||
'license': result_value.get('license'),
|
||||
'timestamp': int_or_none(result.get('timestamp')),
|
||||
'tags': result_value.get('tags'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': signing_channel.get('name'),
|
||||
'channel_id': signing_channel.get('claim_id'),
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'url': streaming_url,
|
||||
}
|
||||
result = self._resolve_url(
|
||||
'lbry://' + display_id, display_id, 'channel')
|
||||
claim_id = result['claim_id']
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, claim_id, url),
|
||||
self._PAGE_SIZE)
|
||||
result_value = result.get('value') or {}
|
||||
return self.playlist_result(
|
||||
entries, claim_id, result_value.get('title'),
|
||||
result_value.get('description'))
|
||||
|
@@ -8,11 +8,15 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
orderedSet,
|
||||
unescapeHTML,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
|
||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
|
||||
'info_dict': {
|
||||
'id': '1498-2',
|
||||
'id': '7971-2',
|
||||
'ext': 'mp4',
|
||||
'title': "Introduction to the Practitioner's Brief",
|
||||
'title': 'What Is Data Science',
|
||||
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
||||
'timestamp': 1607387907,
|
||||
'upload_date': '20201208',
|
||||
'duration': 304,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '154',
|
||||
'title': 'AWS Certified Cloud Practitioner',
|
||||
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
|
||||
'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
|
||||
'duration': 28835,
|
||||
},
|
||||
'playlist_count': 41,
|
||||
'skip': 'Requires Linux Academy account credentials',
|
||||
@@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
||||
'client_id': self._CLIENT_ID,
|
||||
'response_type': 'token id_token',
|
||||
'response_mode': 'web_message',
|
||||
'redirect_uri': self._ORIGIN_URL,
|
||||
'scope': 'openid email user_impersonation profile',
|
||||
'audience': self._ORIGIN_URL,
|
||||
@@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
|
||||
access_token = self._search_regex(
|
||||
r'access_token=([^=&]+)', urlh.geturl(),
|
||||
'access token')
|
||||
'access token', default=None)
|
||||
if not access_token:
|
||||
access_token = self._parse_json(
|
||||
self._search_regex(
|
||||
r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
|
||||
'authorization response'), None,
|
||||
transform_source=js_to_json)['response']['access_token']
|
||||
|
||||
self._download_webpage(
|
||||
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
||||
@@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
|
||||
# course path
|
||||
if course_id:
|
||||
entries = [
|
||||
self.url_result(
|
||||
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
|
||||
for lesson_url in orderedSet(re.findall(
|
||||
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
|
||||
webpage))]
|
||||
title = unescapeHTML(self._html_search_regex(
|
||||
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
|
||||
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
||||
webpage, 'title', default=None, group='value'))
|
||||
description = unescapeHTML(self._html_search_regex(
|
||||
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'description', default=None, group='value'))
|
||||
return self.playlist_result(entries, course_id, title, description)
|
||||
module = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
|
||||
item_id)
|
||||
entries = []
|
||||
chapter_number = None
|
||||
chapter = None
|
||||
chapter_id = None
|
||||
for item in module['items']:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
def type_field(key):
|
||||
return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
|
||||
type_fields = (type_field('name'), type_field('slug'))
|
||||
# Move to next module section
|
||||
if 'section' in type_fields:
|
||||
chapter = item.get('course_name')
|
||||
chapter_id = item.get('course_module')
|
||||
chapter_number = 1 if not chapter_number else chapter_number + 1
|
||||
continue
|
||||
# Skip non-lessons
|
||||
if 'lesson' not in type_fields:
|
||||
continue
|
||||
lesson_url = urljoin(url, item.get('url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
title = item.get('title') or item.get('lesson_name')
|
||||
description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': lesson_url,
|
||||
'ie_key': LinuxAcademyIE.ie_key(),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
|
||||
'duration': parse_duration(item.get('duration')),
|
||||
'chapter': chapter,
|
||||
'chapter_id': chapter_id,
|
||||
'chapter_number': chapter_number,
|
||||
})
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': course_id,
|
||||
'title': module.get('title'),
|
||||
'description': module.get('md_desc') or clean_html(module.get('desc')),
|
||||
'duration': parse_duration(module.get('duration')),
|
||||
}
|
||||
|
||||
# single video path
|
||||
info = self._extract_jwplayer_data(
|
||||
webpage, item_id, require_title=False, m3u8_id='hls',)
|
||||
title = self._search_regex(
|
||||
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
info.update({
|
||||
m3u8_url = self._parse_json(
|
||||
self._search_regex(
|
||||
r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
|
||||
item_id)[0]['file']
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
info = {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
})
|
||||
'formats': formats,
|
||||
}
|
||||
lesson = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'window\.lesson\s*=\s*({.+?})\s*;',
|
||||
r'player\.lesson\s*=\s*({.+?})\s*;'),
|
||||
webpage, 'lesson', default='{}'), item_id, fatal=False)
|
||||
if lesson:
|
||||
info.update({
|
||||
'title': lesson.get('lesson_name'),
|
||||
'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
|
||||
'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
|
||||
'duration': parse_duration(lesson.get('duration')),
|
||||
})
|
||||
if not info.get('title'):
|
||||
info['title'] = self._search_regex(
|
||||
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
return info
|
||||
|
@@ -2,12 +2,16 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@@ -16,6 +20,8 @@ class MDRIE(InfoExtractor):
|
||||
IE_DESC = 'MDR.DE and KiKA'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
# MDR regularly deletes its videos
|
||||
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||
@@ -66,6 +72,22 @@ class MDRIE(InfoExtractor):
|
||||
'duration': 3239,
|
||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||
},
|
||||
}, {
|
||||
# empty bitrateVideo and bitrateAudio
|
||||
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||
'info_dict': {
|
||||
'id': '128372',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der kleine Wichtel kehrt zurück',
|
||||
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||
'duration': 4876,
|
||||
'timestamp': 1607823300,
|
||||
'upload_date': '20201213',
|
||||
'uploader': 'ZDF',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||
'only_matching': True,
|
||||
@@ -91,10 +113,13 @@ class MDRIE(InfoExtractor):
|
||||
|
||||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
|
||||
|
||||
type_ = xpath_text(doc, './type', default=None)
|
||||
|
||||
formats = []
|
||||
processed_urls = []
|
||||
for asset in doc.findall('./assets/asset'):
|
||||
for source in (
|
||||
'download',
|
||||
'progressiveDownload',
|
||||
'dynamicHttpStreamingRedirector',
|
||||
'adaptiveHttpStreamingRedirector'):
|
||||
@@ -102,63 +127,49 @@ class MDRIE(InfoExtractor):
|
||||
if url_el is None:
|
||||
continue
|
||||
|
||||
video_url = url_el.text
|
||||
if video_url in processed_urls:
|
||||
video_url = url_or_none(url_el.text)
|
||||
if not video_url or video_url in processed_urls:
|
||||
continue
|
||||
|
||||
processed_urls.append(video_url)
|
||||
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
|
||||
ext = determine_ext(url_el.text)
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
url_formats = self._extract_m3u8_formats(
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
preference=0, m3u8_id='HLS', fatal=False)
|
||||
preference=0, m3u8_id='HLS', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
url_formats = self._extract_f4m_formats(
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||
preference=0, f4m_id='HDS', fatal=False)
|
||||
preference=0, f4m_id='HDS', fatal=False))
|
||||
else:
|
||||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
|
||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
|
||||
|
||||
format_id = [media_type]
|
||||
if vbr or abr:
|
||||
format_id.append(compat_str(vbr or abr))
|
||||
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%s-%d' % (media_type, vbr or abr),
|
||||
'format_id': '-'.join(format_id),
|
||||
'filesize': filesize,
|
||||
'abr': abr,
|
||||
'preference': 1,
|
||||
'vbr': vbr,
|
||||
}
|
||||
|
||||
if vbr:
|
||||
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
|
||||
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
|
||||
f.update({
|
||||
'vbr': vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
|
||||
'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
|
||||
})
|
||||
|
||||
url_formats = [f]
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
if not url_formats:
|
||||
continue
|
||||
|
||||
if not vbr:
|
||||
for f in url_formats:
|
||||
abr = f.get('tbr') or abr
|
||||
if 'tbr' in f:
|
||||
del f['tbr']
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
formats.extend(url_formats)
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
https?://
|
||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||
(?:
|
||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9A-Z]{16,})
|
||||
@@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -10,7 +10,7 @@ class NhkVodIE(InfoExtractor):
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
# clip
|
||||
# video clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
||||
'info_dict': {
|
||||
@@ -21,6 +21,19 @@ class NhkVodIE(InfoExtractor):
|
||||
'timestamp': 1565965194,
|
||||
'upload_date': '20190816',
|
||||
},
|
||||
}, {
|
||||
# audio clip
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
|
||||
'info_dict': {
|
||||
'id': 'r_inventions-20201104-1-en',
|
||||
'ext': 'm4a',
|
||||
'title': "Japan's Top Inventions - Miniature Video Cameras",
|
||||
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||
'only_matching': True,
|
||||
|
@@ -1,6 +1,8 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -12,17 +14,57 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
NO_DEFAULT,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
urljoin,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
_CDN_REPL_REGEX = r'''(?x)://
|
||||
(?:
|
||||
nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
|
||||
nrk-od-no\.telenorcdn\.net|
|
||||
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
|
||||
)/'''
|
||||
|
||||
def _extract_nrk_formats(self, asset_url, video_id):
|
||||
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
|
||||
return self._extract_akamai_formats(
|
||||
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
|
||||
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
|
||||
formats = self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
|
||||
formats = self._extract_m3u8_formats(
|
||||
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
|
||||
video_id, 'mp4', 'm3u8_native', fatal=False)
|
||||
return formats
|
||||
|
||||
def _raise_error(self, data):
|
||||
MESSAGES = {
|
||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||
'NoProgramRights': 'Ikke tilgjengelig',
|
||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
}
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
|
||||
self.raise_geo_restricted(
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
|
||||
|
||||
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||
return self._download_json(
|
||||
urljoin('http://psapi.nrk.no/', path),
|
||||
video_id, note or 'Downloading %s JSON' % item,
|
||||
fatal=fatal, query=query)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
@@ -41,7 +83,7 @@ class NRKIE(NRKBaseIE):
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': '706f34cdf1322577589e369e522b50ef',
|
||||
'md5': 'f46be075326e23ad0e524edfcb06aeb6',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'mp4',
|
||||
@@ -55,7 +97,7 @@ class NRKIE(NRKBaseIE):
|
||||
# MD5 is unstable
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
@@ -78,9 +120,15 @@ class NRKIE(NRKBaseIE):
|
||||
}]
|
||||
|
||||
def _extract_from_playback(self, video_id):
|
||||
manifest = self._download_json(
|
||||
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
|
||||
video_id, 'Downloading manifest JSON')
|
||||
path_templ = 'playback/%s/' + video_id
|
||||
|
||||
def call_playback_api(item, query=None):
|
||||
return self._call_api(path_templ % item, video_id, item, query=query)
|
||||
# known values for preferredCdn: akamai, iponly, minicdn and telenor
|
||||
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
|
||||
|
||||
if manifest.get('playability') == 'nonPlayable':
|
||||
self._raise_error(manifest['nonPlayable'])
|
||||
|
||||
playable = manifest['playable']
|
||||
|
||||
@@ -94,14 +142,10 @@ class NRKIE(NRKBaseIE):
|
||||
if not format_url:
|
||||
continue
|
||||
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = self._download_json(
|
||||
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
|
||||
video_id, 'Downloading metadata JSON')
|
||||
data = call_playback_api('metadata')
|
||||
|
||||
preplay = data['preplay']
|
||||
titles = preplay['titles']
|
||||
@@ -143,29 +187,22 @@ class NRKIE(NRKBaseIE):
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:tv|radio)\.nrk(?:super)?\.no/
|
||||
(?:serie(?:/[^/]+){1,2}|program)/
|
||||
(?![Ee]pisodes)%s
|
||||
(?:/\d{2}-\d{2}-\d{4})?
|
||||
(?:\#del=(?P<part_id>\d+))?
|
||||
''' % _EPISODE_RE
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
||||
'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
|
||||
'info_dict': {
|
||||
'id': 'MDDP12000117AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alarm Trolltunga',
|
||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
'duration': 2223,
|
||||
'duration': 2223.44,
|
||||
'age_limit': 6,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||
'md5': '8d40dab61cea8ab0114e090b029a0565',
|
||||
'info_dict': {
|
||||
'id': 'MUHH48000314AA',
|
||||
'ext': 'mp4',
|
||||
@@ -175,7 +212,6 @@ class NRKTVIE(NRKBaseIE):
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
'skip': 'NoProgramRights',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
@@ -183,7 +219,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
|
||||
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
|
||||
'duration': 4605,
|
||||
'duration': 4605.08,
|
||||
'series': 'Kunnskapskanalen',
|
||||
'episode': '24.05.2014',
|
||||
},
|
||||
@@ -194,51 +230,25 @@ class NRKTVIE(NRKBaseIE):
|
||||
# single playlist video
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515-part2',
|
||||
'ext': 'flv',
|
||||
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
|
||||
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
'skip': 'particular part is not supported currently',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 772,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515BH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'duration': 6175,
|
||||
'series': 'Tour de Ski',
|
||||
'episode': '06.01.2015',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}],
|
||||
'info_dict': {
|
||||
'id': 'MSPO40010515',
|
||||
'id': 'MSPO40010515AH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
|
||||
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
|
||||
'description': 'md5:c03aba1e917561eface5214020551b7a',
|
||||
},
|
||||
'expected_warnings': ['Video is geo restricted'],
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
|
||||
'info_dict': {
|
||||
@@ -269,12 +279,16 @@ class NRKTVIE(NRKBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'ProgramRightsHasExpired',
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_api_host = None
|
||||
@@ -295,6 +309,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
urls = []
|
||||
entries = []
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
@@ -311,19 +326,14 @@ class NRKTVIE(NRKBaseIE):
|
||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||
for num, asset in enumerate(media_assets, 1):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
if not asset_url or asset_url in urls:
|
||||
continue
|
||||
formats = self._extract_akamai_formats(asset_url, video_id)
|
||||
urls.append(asset_url)
|
||||
formats = self._extract_nrk_formats(asset_url, video_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Some f4m streams may not work with hdcore in fragments' URLs
|
||||
for f in formats:
|
||||
extra_param = f.get('extra_param_to_segment_url')
|
||||
if extra_param and 'hdcore' in extra_param:
|
||||
del f['extra_param_to_segment_url']
|
||||
|
||||
entry_id, entry_title = video_id_and_title(num)
|
||||
duration = parse_duration(asset.get('duration'))
|
||||
subtitles = {}
|
||||
@@ -339,38 +349,26 @@ class NRKTVIE(NRKBaseIE):
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
media_url = data.get('mediaUrl')
|
||||
if media_url:
|
||||
formats = self._extract_akamai_formats(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': make_title(title),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}]
|
||||
if media_url and media_url not in urls:
|
||||
formats = self._extract_nrk_formats(media_url, video_id)
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': make_title(title),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'is_live': live,
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
MESSAGES = {
|
||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||
'NoProgramRights': 'Ikke tilgjengelig',
|
||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
}
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type:
|
||||
self.raise_geo_restricted(
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
||||
message_type, message_type)),
|
||||
expected=True)
|
||||
self._raise_error(data)
|
||||
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
@@ -512,57 +510,98 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
return info
|
||||
|
||||
|
||||
class NRKTVSerieBaseIE(InfoExtractor):
|
||||
def _extract_series(self, webpage, display_id, fatal=True):
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
||||
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False, transform_source=js_to_json)
|
||||
if not config:
|
||||
return
|
||||
return try_get(
|
||||
config,
|
||||
(lambda x: x['initialState']['series'], lambda x: x['series']),
|
||||
dict)
|
||||
|
||||
def _extract_seasons(self, seasons):
|
||||
if not isinstance(seasons, list):
|
||||
return []
|
||||
entries = []
|
||||
for season in seasons:
|
||||
entries.extend(self._extract_episodes(season))
|
||||
return entries
|
||||
|
||||
def _extract_episodes(self, season):
|
||||
if not isinstance(season, dict):
|
||||
return []
|
||||
return self._extract_entries(season.get('episodes'))
|
||||
|
||||
class NRKTVSerieBaseIE(NRKBaseIE):
|
||||
def _extract_entries(self, entry_list):
|
||||
if not isinstance(entry_list, list):
|
||||
return []
|
||||
entries = []
|
||||
for episode in entry_list:
|
||||
nrk_id = episode.get('prfId')
|
||||
nrk_id = episode.get('prfId') or episode.get('episodeId')
|
||||
if not nrk_id or not isinstance(nrk_id, compat_str):
|
||||
continue
|
||||
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
|
||||
return entries
|
||||
|
||||
_ASSETS_KEYS = ('episodes', 'instalments',)
|
||||
|
||||
def _extract_assets_key(self, embedded):
|
||||
for asset_key in self._ASSETS_KEYS:
|
||||
if embedded.get(asset_key):
|
||||
return asset_key
|
||||
|
||||
def _entries(self, data, display_id):
|
||||
for page_num in itertools.count(1):
|
||||
embedded = data.get('_embedded') or data
|
||||
if not isinstance(embedded, dict):
|
||||
break
|
||||
assets_key = self._extract_assets_key(embedded)
|
||||
if not assets_key:
|
||||
break
|
||||
# Extract entries
|
||||
entries = try_get(
|
||||
embedded,
|
||||
(lambda x: x[assets_key]['_embedded'][assets_key],
|
||||
lambda x: x[assets_key]),
|
||||
list)
|
||||
for e in self._extract_entries(entries):
|
||||
yield e
|
||||
# Find next URL
|
||||
next_url_path = try_get(
|
||||
data,
|
||||
(lambda x: x['_links']['next']['href'],
|
||||
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
|
||||
compat_str)
|
||||
if not next_url_path:
|
||||
break
|
||||
data = self._call_api(
|
||||
next_url_path, display_id,
|
||||
note='Downloading %s JSON page %d' % (assets_key, page_num),
|
||||
fatal=False)
|
||||
if not data:
|
||||
break
|
||||
|
||||
|
||||
class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'id': 'backstage/1',
|
||||
'title': 'Sesong 1',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}
|
||||
}, {
|
||||
# no /sesong/ in path
|
||||
'url': 'https://tv.nrk.no/serie/lindmo/2016',
|
||||
'info_dict': {
|
||||
'id': 'lindmo/2016',
|
||||
'title': '2016',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}, {
|
||||
# weird nested _embedded in catalog JSON response
|
||||
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
|
||||
'info_dict': {
|
||||
'id': 'dickie-dick-dickens/1',
|
||||
'title': 'Sesong 1',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
# 841 entries, multi page
|
||||
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
|
||||
'info_dict': {
|
||||
'id': 'dagsnytt/201509',
|
||||
'title': 'September 2015',
|
||||
},
|
||||
'playlist_mincount': 841,
|
||||
}, {
|
||||
# 180 entries, single page
|
||||
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@@ -570,25 +609,35 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
|
||||
else super(NRKTVSeasonIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
domain, serie, season_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = '%s/%s' % (serie, season_id)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = self._call_api(
|
||||
'%s/catalog/series/%s/seasons/%s' % (domain, serie, season_id),
|
||||
display_id, 'season', query={'pageSize': 50})
|
||||
|
||||
series = self._extract_series(webpage, display_id)
|
||||
|
||||
season = next(
|
||||
s for s in series['seasons']
|
||||
if int(display_id) == s.get('seasonNumber'))
|
||||
|
||||
title = try_get(season, lambda x: x['titles']['title'], compat_str)
|
||||
title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
|
||||
return self.playlist_result(
|
||||
self._extract_episodes(season), display_id, title)
|
||||
self._entries(data, display_id),
|
||||
display_id, title)
|
||||
|
||||
|
||||
class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/serie/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
# new layout, instalments
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
# new layout, instalments, more entries
|
||||
'url': 'https://tv.nrk.no/serie/lindmo',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/blank',
|
||||
'info_dict': {
|
||||
'id': 'blank',
|
||||
@@ -602,25 +651,16 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'backstage',
|
||||
'title': 'Backstage',
|
||||
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
|
||||
'description': 'md5:63692ceb96813d9a207e9910483d948b',
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
# new layout, instalments
|
||||
'url': 'https://tv.nrk.no/serie/groenn-glede',
|
||||
'info_dict': {
|
||||
'id': 'groenn-glede',
|
||||
'title': 'Grønn glede',
|
||||
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
# old layout
|
||||
'url': 'https://tv.nrksuper.no/serie/labyrint',
|
||||
'info_dict': {
|
||||
'id': 'labyrint',
|
||||
'title': 'Labyrint',
|
||||
'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
|
||||
'description': 'I Daidalos sin undersjøiske Labyrint venter spennende oppgaver, skumle robotskapninger og slim.',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
@@ -632,6 +672,17 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/postmann-pat',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
|
||||
'info_dict': {
|
||||
'id': 'dickie-dick-dickens',
|
||||
'title': 'Dickie Dick Dickens',
|
||||
'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
'url': 'https://nrksuper.no/serie/labyrint',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -642,43 +693,42 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
else super(NRKTVSeriesIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
site, series_id = re.match(self._VALID_URL, url).groups()
|
||||
is_radio = site == 'radio.nrk'
|
||||
domain = 'radio' if is_radio else 'tv'
|
||||
|
||||
webpage = self._download_webpage(url, series_id)
|
||||
size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
|
||||
series = self._call_api(
|
||||
'%s/catalog/series/%s' % (domain, series_id),
|
||||
series_id, 'serie', query={size_prefix + 'ageSize': 50})
|
||||
titles = try_get(series, [
|
||||
lambda x: x['titles'],
|
||||
lambda x: x[x['type']]['titles'],
|
||||
lambda x: x[x['seriesType']]['titles'],
|
||||
]) or {}
|
||||
|
||||
# New layout (e.g. https://tv.nrk.no/serie/backstage)
|
||||
series = self._extract_series(webpage, series_id, fatal=False)
|
||||
if series:
|
||||
title = try_get(series, lambda x: x['titles']['title'], compat_str)
|
||||
description = try_get(
|
||||
series, lambda x: x['titles']['subtitle'], compat_str)
|
||||
entries = []
|
||||
entries.extend(self._extract_seasons(series.get('seasons')))
|
||||
entries.extend(self._extract_entries(series.get('instalments')))
|
||||
entries.extend(self._extract_episodes(series.get('extraMaterial')))
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
entries = []
|
||||
entries.extend(self._entries(series, series_id))
|
||||
embedded = series.get('_embedded') or {}
|
||||
linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
|
||||
embedded_seasons = embedded.get('seasons') or []
|
||||
if len(linked_seasons) > len(embedded_seasons):
|
||||
for season in linked_seasons:
|
||||
season_name = season.get('name')
|
||||
if season_name and isinstance(season_name, compat_str):
|
||||
entries.append(self.url_result(
|
||||
'https://%s.nrk.no/serie/%s/sesong/%s'
|
||||
% (domain, series_id, season_name),
|
||||
ie=NRKTVSeasonIE.ie_key(),
|
||||
video_title=season.get('title')))
|
||||
else:
|
||||
for season in embedded_seasons:
|
||||
entries.extend(self._entries(season, series_id))
|
||||
entries.extend(self._entries(
|
||||
embedded.get('extraMaterial') or {}, series_id))
|
||||
|
||||
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
|
||||
entries = [
|
||||
self.url_result(
|
||||
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
|
||||
series=series_id, season=season_id))
|
||||
for season_id in re.findall(self._ITEM_RE, webpage)
|
||||
]
|
||||
|
||||
title = self._html_search_meta(
|
||||
'seriestitle', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, fatal=False)
|
||||
if title:
|
||||
title = self._search_regex(
|
||||
r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'series_description', webpage,
|
||||
'description', default=None) or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(entries, series_id, title, description)
|
||||
return self.playlist_result(
|
||||
entries, series_id, titles.get('title'), titles.get('subtitle'))
|
||||
|
||||
|
||||
class NRKTVDirekteIE(NRKTVIE):
|
||||
@@ -782,14 +832,8 @@ class NRKSkoleIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
|
||||
video_id)
|
||||
|
||||
nrk_id = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
|
||||
webpage, 'application json'),
|
||||
video_id)['activeMedia']['psId']
|
||||
nrk_id = self._download_json(
|
||||
'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id,
|
||||
video_id)['psId']
|
||||
|
||||
return self.url_result('nrk:%s' % nrk_id)
|
||||
|
@@ -541,6 +541,10 @@ class PeerTubeIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'filesize': file_size,
|
||||
})
|
||||
if format_id == '0p':
|
||||
f['vcodec'] = 'none'
|
||||
else:
|
||||
f['fps'] = int_or_none(file_.get('fps'))
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -31,7 +31,12 @@ class PornHubBaseIE(InfoExtractor):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
ret = dl(*args, **kwargs)
|
||||
|
||||
if not ret:
|
||||
return ret
|
||||
|
||||
webpage, urlh = ret
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||
@@ -53,7 +58,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
@@ -152,6 +157,9 @@ class PornHubIE(PornHubBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||
'only_matching': True,
|
||||
@@ -160,7 +168,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
webpage)
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
@@ -346,9 +354,9 @@ class PornHubIE(PornHubBaseIE):
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
@@ -422,7 +430,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
|
||||
|
||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||
'playlist_mincount': 118,
|
||||
@@ -490,7 +498,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||
'only_matching': True,
|
||||
@@ -605,7 +613,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'info_dict': {
|
||||
|
@@ -6,14 +6,24 @@ from ..compat import compat_urllib_parse_urlparse
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
xpath_attr,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class RuutuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/|
|
||||
static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid=
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ruutu.fi/video/2058907',
|
||||
@@ -71,15 +81,53 @@ class RuutuIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 502: Bad Gateway'],
|
||||
}
|
||||
'expected_warnings': [
|
||||
'HTTP Error 502: Bad Gateway',
|
||||
'Failed to download m3u8 information',
|
||||
],
|
||||
},
|
||||
{
|
||||
'url': 'http://www.supla.fi/audio/2231370',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# episode
|
||||
'url': 'https://www.ruutu.fi/video/3401964',
|
||||
'info_dict': {
|
||||
'id': '3401964',
|
||||
'ext': 'mp4',
|
||||
'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17',
|
||||
'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 2582,
|
||||
'age_limit': 12,
|
||||
'upload_date': '20190508',
|
||||
'series': 'Temptation Island Suomi',
|
||||
'season_number': 5,
|
||||
'episode_number': 17,
|
||||
'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# premium
|
||||
'url': 'https://www.ruutu.fi/video/3618715',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_API_BASE = 'https://gatling.nelonenmedia.fi'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
|
||||
'%s/media-xml-cache' % self._API_BASE, video_id,
|
||||
query={'id': video_id})
|
||||
|
||||
formats = []
|
||||
@@ -96,9 +144,18 @@ class RuutuIE(InfoExtractor):
|
||||
continue
|
||||
processed_urls.append(video_url)
|
||||
ext = determine_ext(video_url)
|
||||
auth_video_url = url_or_none(self._download_webpage(
|
||||
'%s/auth/access/v2' % self._API_BASE, video_id,
|
||||
note='Downloading authenticated %s stream URL' % ext,
|
||||
fatal=False, query={'stream': video_url}))
|
||||
if auth_video_url:
|
||||
processed_urls.append(auth_video_url)
|
||||
video_url = auth_video_url
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
video_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id='hds', fatal=False))
|
||||
@@ -136,18 +193,35 @@ class RuutuIE(InfoExtractor):
|
||||
|
||||
extract_formats(video_xml.find('./Clip'))
|
||||
|
||||
drm = xpath_text(video_xml, './Clip/DRM', default=None)
|
||||
if not formats and drm:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
def pv(name):
|
||||
node = find_xpath_attr(
|
||||
video_xml, './Clip/PassthroughVariables/variable', 'name', name)
|
||||
if node is not None:
|
||||
return node.get('value')
|
||||
|
||||
if not formats:
|
||||
drm = xpath_text(video_xml, './Clip/DRM', default=None)
|
||||
if drm:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
ns_st_cds = pv('ns_st_cds')
|
||||
if ns_st_cds != 'free':
|
||||
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
themes = pv('themes')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True),
|
||||
'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'),
|
||||
'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'),
|
||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')),
|
||||
'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')),
|
||||
'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')),
|
||||
'upload_date': unified_strdate(pv('date_start')),
|
||||
'series': pv('series_name'),
|
||||
'season_number': int_or_none(pv('season_number')),
|
||||
'episode_number': int_or_none(pv('episode_number')),
|
||||
'categories': themes.split(',') if themes else [],
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -2,7 +2,12 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
@@ -18,8 +23,21 @@ class SlidesLiveIE(InfoExtractor):
|
||||
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
||||
'uploader': 'SlidesLive Videos - A',
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'timestamp': 1597615266,
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
}, {
|
||||
# video_service_name = yoda
|
||||
'url': 'https://slideslive.com/38935785',
|
||||
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
|
||||
'info_dict': {
|
||||
'id': 'RMraDYN5ozA_',
|
||||
'ext': 'mp4',
|
||||
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
# video_service_name = youtube
|
||||
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
||||
@@ -39,18 +57,48 @@ class SlidesLiveIE(InfoExtractor):
|
||||
video_data = self._download_json(
|
||||
'https://ben.slideslive.com/player/' + video_id, video_id)
|
||||
service_name = video_data['video_service_name'].lower()
|
||||
assert service_name in ('url', 'vimeo', 'youtube')
|
||||
assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
|
||||
service_id = video_data['video_service_id']
|
||||
subtitles = {}
|
||||
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
webvtt_url = url_or_none(sub.get('webvtt_url'))
|
||||
if not webvtt_url:
|
||||
continue
|
||||
lang = sub.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': webvtt_url,
|
||||
})
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'url': service_id,
|
||||
'is_live': bool_or_none(video_data.get('is_live')),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
if service_name == 'url':
|
||||
if service_name in ('url', 'yoda'):
|
||||
info['title'] = video_data['title']
|
||||
if service_name == 'url':
|
||||
info['url'] = service_id
|
||||
else:
|
||||
formats = []
|
||||
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
_MANIFEST_PATTERN % (service_id, 'm3u8'),
|
||||
service_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
|
||||
mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
info.update({
|
||||
'id': service_id,
|
||||
'formats': formats,
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': service_id,
|
||||
'ie_key': service_name.capitalize(),
|
||||
'title': video_data.get('title'),
|
||||
})
|
||||
|
@@ -1,416 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class SmotriIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com'
|
||||
IE_NAME = 'smotri'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
_TESTS = [
|
||||
# real video id 2610366
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v261036632ab',
|
||||
'md5': '02c0dfab2102984e9c5bb585cc7cc321',
|
||||
'info_dict': {
|
||||
'id': 'v261036632ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'катастрофа с камер видеонаблюдения',
|
||||
'uploader': 'rbc2008',
|
||||
'uploader_id': 'rbc08',
|
||||
'upload_date': '20131118',
|
||||
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
|
||||
},
|
||||
},
|
||||
# real video id 57591
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v57591cb20',
|
||||
'md5': '830266dfc21f077eac5afd1883091bcd',
|
||||
'info_dict': {
|
||||
'id': 'v57591cb20',
|
||||
'ext': 'flv',
|
||||
'title': 'test',
|
||||
'uploader': 'Support Photofile@photofile',
|
||||
'uploader_id': 'support-photofile',
|
||||
'upload_date': '20070704',
|
||||
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
|
||||
},
|
||||
},
|
||||
# video-password, not approved by moderator
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
|
||||
'md5': 'f6331cef33cad65a0815ee482a54440b',
|
||||
'info_dict': {
|
||||
'id': 'v1390466a13c',
|
||||
'ext': 'mp4',
|
||||
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
|
||||
'uploader': 'timoxa40',
|
||||
'uploader_id': 'timoxa40',
|
||||
'upload_date': '20100404',
|
||||
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': 'qwerty',
|
||||
},
|
||||
'skip': 'Video is not approved by moderator',
|
||||
},
|
||||
# video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v6984858774#',
|
||||
'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
|
||||
'info_dict': {
|
||||
'id': 'v6984858774',
|
||||
'ext': 'mp4',
|
||||
'title': 'Дача Солженицина ПАРОЛЬ 223322',
|
||||
'uploader': 'psavari1',
|
||||
'uploader_id': 'psavari1',
|
||||
'upload_date': '20081103',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '223322',
|
||||
},
|
||||
},
|
||||
# age limit + video-password, not approved by moderator
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
|
||||
'md5': '91e909c9f0521adf5ee86fbe073aad70',
|
||||
'info_dict': {
|
||||
'id': 'v15408898bcf',
|
||||
'ext': 'flv',
|
||||
'title': 'этот ролик не покажут по ТВ',
|
||||
'uploader': 'zzxxx',
|
||||
'uploader_id': 'ueggb',
|
||||
'upload_date': '20101001',
|
||||
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '333'
|
||||
},
|
||||
'skip': 'Video is not approved by moderator',
|
||||
},
|
||||
# age limit + video-password
|
||||
{
|
||||
'url': 'http://smotri.com/video/view/?id=v7780025814',
|
||||
'md5': 'b4599b068422559374a59300c5337d72',
|
||||
'info_dict': {
|
||||
'id': 'v7780025814',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sexy Beach (пароль 123)',
|
||||
'uploader': 'вАся',
|
||||
'uploader_id': 'asya_prosto',
|
||||
'upload_date': '20081218',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '123'
|
||||
},
|
||||
},
|
||||
# swf player
|
||||
{
|
||||
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
|
||||
'md5': '31099eeb4bc906712c5f40092045108d',
|
||||
'info_dict': {
|
||||
'id': 'v9188090500',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shakira - Don\'t Bother',
|
||||
'uploader': 'HannahL',
|
||||
'uploader_id': 'lisaha95',
|
||||
'upload_date': '20090331',
|
||||
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _extract_url(cls, webpage):
|
||||
mobj = re.search(
|
||||
r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return mobj.group('url')
|
||||
|
||||
mobj = re.search(
|
||||
r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
|
||||
<div\s+class="video_image">[^<]+</div>\s*
|
||||
<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
|
||||
if mobj is not None:
|
||||
return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
|
||||
|
||||
def _search_meta(self, name, html, display_name=None):
|
||||
if display_name is None:
|
||||
display_name = name
|
||||
return self._html_search_meta(name, html, display_name)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_form = {
|
||||
'ticket': video_id,
|
||||
'video_url': '1',
|
||||
'frame_url': '1',
|
||||
'devid': 'LoadupFlashPlayer',
|
||||
'getvideoinfo': '1',
|
||||
}
|
||||
|
||||
video_password = self._downloader.params.get('videopassword')
|
||||
if video_password:
|
||||
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
|
||||
|
||||
video = self._download_json(
|
||||
'http://smotri.com/video/view/url/bot/',
|
||||
video_id, 'Downloading video JSON',
|
||||
data=urlencode_postdata(video_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
|
||||
|
||||
if not video_url:
|
||||
if video.get('_moderate_no'):
|
||||
raise ExtractorError(
|
||||
'Video %s has not been approved by moderator' % video_id, expected=True)
|
||||
|
||||
if video.get('error'):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
if video.get('_pass_protected') == 1:
|
||||
msg = ('Invalid video password' if video_password
|
||||
else 'This video is protected by a password, use the --video-password option')
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
title = video['title']
|
||||
thumbnail = video.get('_imgURL')
|
||||
upload_date = unified_strdate(video.get('added'))
|
||||
uploader = video.get('userNick')
|
||||
uploader_id = video.get('userLogin')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
# Video JSON does not provide enough meta data
|
||||
# We will extract some from the video web page instead
|
||||
webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
|
||||
|
||||
# Warning if video is unavailable
|
||||
warning = self._html_search_regex(
|
||||
r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
|
||||
'warning message', default=None)
|
||||
if warning is not None:
|
||||
self._downloader.report_warning(
|
||||
'Video %s may not be available; smotri said: %s ' %
|
||||
(video_id, warning))
|
||||
|
||||
# Adult content
|
||||
if 'EroConfirmText">' in webpage:
|
||||
self.report_age_confirmation()
|
||||
confirm_string = self._html_search_regex(
|
||||
r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
|
||||
webpage, 'confirm string')
|
||||
confirm_url = webpage_url + '&confirm=%s' % confirm_string
|
||||
webpage = self._download_webpage(
|
||||
confirm_url, video_id,
|
||||
'Downloading video page (age confirmed)')
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
view_count = self._html_search_regex(
|
||||
r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': int_or_none(view_count),
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
}
|
||||
|
||||
|
||||
class SmotriCommunityIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com community videos'
|
||||
IE_NAME = 'smotri:community'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TEST = {
|
||||
'url': 'http://smotri.com/community/video/kommuna',
|
||||
'info_dict': {
|
||||
'id': 'kommuna',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
community_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
|
||||
community_id, 'Downloading community RSS')
|
||||
|
||||
entries = [
|
||||
self.url_result(video_url.text, SmotriIE.ie_key())
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
return self.playlist_result(entries, community_id)
|
||||
|
||||
|
||||
class SmotriUserIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com user videos'
|
||||
IE_NAME = 'smotri:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://smotri.com/user/inspector',
|
||||
'info_dict': {
|
||||
'id': 'inspector',
|
||||
'title': 'Inspector',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
rss = self._download_xml(
|
||||
'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
|
||||
user_id, 'Downloading user RSS')
|
||||
|
||||
entries = [self.url_result(video_url.text, 'Smotri')
|
||||
for video_url in rss.findall('./channel/item/link')]
|
||||
|
||||
description_text = xpath_text(rss, './channel/description') or ''
|
||||
user_nickname = self._search_regex(
|
||||
'^Видео режиссера (.+)$', description_text,
|
||||
'user nickname', fatal=False)
|
||||
|
||||
return self.playlist_result(entries, user_id, user_nickname)
|
||||
|
||||
|
||||
class SmotriBroadcastIE(InfoExtractor):
|
||||
IE_DESC = 'Smotri.com broadcasts'
|
||||
IE_NAME = 'smotri:broadcast'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
|
||||
_NETRC_MACHINE = 'smotri'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
broadcast_id = mobj.group('id')
|
||||
|
||||
broadcast_url = 'http://' + mobj.group('url')
|
||||
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
|
||||
|
||||
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
|
||||
raise ExtractorError(
|
||||
'Broadcast %s does not exist' % broadcast_id, expected=True)
|
||||
|
||||
# Adult content
|
||||
if re.search('EroConfirmText">', broadcast_page) is not None:
|
||||
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required(
|
||||
'Erotic broadcasts allowed only for registered users')
|
||||
|
||||
login_form = {
|
||||
'login-hint53': '1',
|
||||
'confirm_erotic': '1',
|
||||
'login': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
broadcast_page = self._download_webpage(
|
||||
request, broadcast_id, 'Logging in and confirming age')
|
||||
|
||||
if '>Неверный логин или пароль<' in broadcast_page:
|
||||
raise ExtractorError(
|
||||
'Unable to log in: bad username or password', expected=True)
|
||||
|
||||
adult_content = True
|
||||
else:
|
||||
adult_content = False
|
||||
|
||||
ticket = self._html_search_regex(
|
||||
(r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
|
||||
r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
|
||||
broadcast_page, 'broadcast ticket', group='ticket')
|
||||
|
||||
broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
|
||||
|
||||
broadcast_password = self._downloader.params.get('videopassword')
|
||||
if broadcast_password:
|
||||
broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
|
||||
|
||||
broadcast_json_page = self._download_webpage(
|
||||
broadcast_url, broadcast_id, 'Downloading broadcast JSON')
|
||||
|
||||
try:
|
||||
broadcast_json = json.loads(broadcast_json_page)
|
||||
|
||||
protected_broadcast = broadcast_json['_pass_protected'] == 1
|
||||
if protected_broadcast and not broadcast_password:
|
||||
raise ExtractorError(
|
||||
'This broadcast is protected by a password, use the --video-password option',
|
||||
expected=True)
|
||||
|
||||
broadcast_offline = broadcast_json['is_play'] == 0
|
||||
if broadcast_offline:
|
||||
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
|
||||
|
||||
rtmp_url = broadcast_json['_server']
|
||||
mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
|
||||
if not mobj:
|
||||
raise ExtractorError('Unexpected broadcast rtmp URL')
|
||||
|
||||
broadcast_playpath = broadcast_json['_streamName']
|
||||
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
|
||||
broadcast_thumbnail = broadcast_json.get('_imgURL')
|
||||
broadcast_title = self._live_title(broadcast_json['title'])
|
||||
broadcast_description = broadcast_json.get('description')
|
||||
broadcaster_nick = broadcast_json.get('nick')
|
||||
broadcaster_login = broadcast_json.get('login')
|
||||
rtmp_conn = 'S:%s' % uuid.uuid4().hex
|
||||
except KeyError:
|
||||
if protected_broadcast:
|
||||
raise ExtractorError('Bad broadcast password', expected=True)
|
||||
raise ExtractorError('Unexpected broadcast JSON')
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'url': rtmp_url,
|
||||
'title': broadcast_title,
|
||||
'thumbnail': broadcast_thumbnail,
|
||||
'description': broadcast_description,
|
||||
'uploader': broadcaster_nick,
|
||||
'uploader_id': broadcaster_login,
|
||||
'age_limit': 18 if adult_content else 0,
|
||||
'ext': 'flv',
|
||||
'play_path': broadcast_playpath,
|
||||
'player_url': 'http://pics.smotri.com/broadcast_play.swf',
|
||||
'app': broadcast_app,
|
||||
'rtmp_live': True,
|
||||
'rtmp_conn': rtmp_conn,
|
||||
'is_live': True,
|
||||
}
|
176
youtube_dl/extractor/spreaker.py
Normal file
176
youtube_dl/extractor/spreaker.py
Normal file
@@ -0,0 +1,176 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
def _extract_episode(data, episode_id=None):
|
||||
title = data['title']
|
||||
download_url = data['download_url']
|
||||
|
||||
series = try_get(data, lambda x: x['show']['title'], compat_str)
|
||||
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
|
||||
|
||||
thumbnails = []
|
||||
for image in ('image_original', 'image_medium', 'image'):
|
||||
image_url = url_or_none(data.get('%s_url' % image))
|
||||
if image_url:
|
||||
thumbnails.append({'url': image_url})
|
||||
|
||||
def stats(key):
|
||||
return int_or_none(try_get(
|
||||
data,
|
||||
(lambda x: x['%ss_count' % key],
|
||||
lambda x: x['stats']['%ss' % key])))
|
||||
|
||||
def duration(key):
|
||||
return float_or_none(data.get(key), scale=1000)
|
||||
|
||||
return {
|
||||
'id': compat_str(episode_id or data['episode_id']),
|
||||
'url': download_url,
|
||||
'display_id': data.get('permalink'),
|
||||
'title': title,
|
||||
'description': data.get('description'),
|
||||
'timestamp': unified_timestamp(data.get('published_at')),
|
||||
'uploader': uploader,
|
||||
'uploader_id': str_or_none(data.get('author_id')),
|
||||
'creator': uploader,
|
||||
'duration': duration('duration') or duration('length'),
|
||||
'view_count': stats('play'),
|
||||
'like_count': stats('like'),
|
||||
'comment_count': stats('message'),
|
||||
'format': 'MPEG Layer 3',
|
||||
'format_id': 'mp3',
|
||||
'container': 'mp3',
|
||||
'ext': 'mp3',
|
||||
'thumbnails': thumbnails,
|
||||
'series': series,
|
||||
'extractor_key': SpreakerIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class SpreakerIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
api\.spreaker\.com/
|
||||
(?:
|
||||
(?:download/)?episode|
|
||||
v2/episodes
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/episode/12534508',
|
||||
'info_dict': {
|
||||
'id': '12534508',
|
||||
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
|
||||
'ext': 'mp3',
|
||||
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
|
||||
'description': 'md5:0588c43e27be46423e183076fa071177',
|
||||
'timestamp': 1502250336,
|
||||
'upload_date': '20170809',
|
||||
'uploader': 'SWM',
|
||||
'uploader_id': '9780658',
|
||||
'duration': 1063.42,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'series': 'Success With Music (SWM)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
|
||||
episode_id)['response']['episode']
|
||||
return _extract_episode(data, episode_id)
|
||||
|
||||
|
||||
class SpreakerPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_id = self._search_regex(
|
||||
(r'data-episode_id=["\'](?P<id>\d+)',
|
||||
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
|
||||
return self.url_result(
|
||||
'https://api.spreaker.com/episode/%s' % episode_id,
|
||||
ie=SpreakerIE.ie_key(), video_id=episode_id)
|
||||
|
||||
|
||||
class SpreakerShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/show/4652058',
|
||||
'info_dict': {
|
||||
'id': '4652058',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}]
|
||||
|
||||
def _entries(self, show_id):
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
'https://api.spreaker.com/show/%s/episodes' % show_id,
|
||||
show_id, note='Downloading JSON page %d' % page_num, query={
|
||||
'page': page_num,
|
||||
'max_per_page': 100,
|
||||
})
|
||||
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
|
||||
if not pager:
|
||||
break
|
||||
results = pager.get('results')
|
||||
if not results or not isinstance(results, list):
|
||||
break
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
yield _extract_episode(result)
|
||||
if page_num == pager.get('last_page'):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
|
||||
|
||||
class SpreakerShowPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/show/success-with-music',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show_id = self._search_regex(
|
||||
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
|
||||
return self.url_result(
|
||||
'https://api.spreaker.com/show/%s' % show_id,
|
||||
ie=SpreakerShowIE.ie_key(), video_id=show_id)
|
@@ -269,7 +269,7 @@ class TeachableCourseIE(TeachableBaseIE):
|
||||
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
|
||||
webpage):
|
||||
li = mobj.group('li')
|
||||
if 'fa-youtube-play' not in li:
|
||||
if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li):
|
||||
continue
|
||||
lecture_url = self._search_regex(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
|
||||
|
@@ -12,25 +12,16 @@ from ..utils import (
|
||||
|
||||
|
||||
class TeleQuebecBaseIE(InfoExtractor):
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||
|
||||
@staticmethod
|
||||
def _result(url, ie_key):
|
||||
def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(url, {'geo_countries': ['CA']}),
|
||||
'ie_key': ie_key,
|
||||
'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _limelight_result(media_id):
|
||||
return TeleQuebecBaseIE._result(
|
||||
'limelight:media:' + media_id, 'LimelightMedia')
|
||||
|
||||
@staticmethod
|
||||
def _brightcove_result(brightcove_id):
|
||||
return TeleQuebecBaseIE._result(
|
||||
'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
|
||||
% brightcove_id, 'BrightcoveNew')
|
||||
|
||||
|
||||
class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
@@ -44,14 +35,18 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
# available till 01.01.2023
|
||||
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
|
||||
'info_dict': {
|
||||
'id': '577116881b4b439084e6b1cf4ef8b1b3',
|
||||
'id': '6155972771001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Un petit choc et puis repart!',
|
||||
'description': 'md5:067bc84bd6afecad85e69d1000730907',
|
||||
'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
|
||||
'timestamp': 1589262469,
|
||||
'uploader_id': '6150020952001',
|
||||
'upload_date': '20200512',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
|
||||
'info_dict': {
|
||||
@@ -65,7 +60,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['BrightcoveNew'],
|
||||
}, {
|
||||
@@ -79,25 +73,20 @@ class TeleQuebecIE(TeleQuebecBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
|
||||
media_data = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
|
||||
media = self._download_json(
|
||||
'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
|
||||
media_id)['media']
|
||||
|
||||
source_id = media_data['streamInfo']['sourceId']
|
||||
source = (try_get(
|
||||
media_data, lambda x: x['streamInfo']['source'],
|
||||
compat_str) or 'limelight').lower()
|
||||
if source == 'brightcove':
|
||||
info = self._brightcove_result(source_id)
|
||||
else:
|
||||
info = self._limelight_result(source_id)
|
||||
source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
|
||||
info = self._brightcove_result(source_id, '22gPKdt7f')
|
||||
product = media.get('product') or {}
|
||||
season = product.get('season') or {}
|
||||
info.update({
|
||||
'title': media_data.get('title'),
|
||||
'description': try_get(
|
||||
media_data, lambda x: x['descriptions'][0]['text'], compat_str),
|
||||
'duration': int_or_none(
|
||||
media_data.get('durationInMilliseconds'), 1000),
|
||||
'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
|
||||
'series': try_get(season, lambda x: x['serie']['titre']),
|
||||
'season': season.get('name'),
|
||||
'season_number': int_or_none(season.get('seasonNo')),
|
||||
'episode': product.get('titre'),
|
||||
'episode_number': int_or_none(product.get('episodeNo')),
|
||||
})
|
||||
return info
|
||||
|
||||
@@ -148,7 +137,7 @@ class TeleQuebecSquatIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
class TeleQuebecEmissionIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
@@ -160,15 +149,16 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
|
||||
'info_dict': {
|
||||
'id': '66648a6aef914fe3badda25e81a4d50a',
|
||||
'id': '6154476028001',
|
||||
'ext': 'mp4',
|
||||
'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
|
||||
'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
|
||||
'upload_date': '20171024',
|
||||
'timestamp': 1508862118,
|
||||
'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?',
|
||||
'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
|
||||
'upload_date': '20200505',
|
||||
'timestamp': 1588713424,
|
||||
'uploader_id': '6150020952001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
|
||||
@@ -187,26 +177,26 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_id = self._search_regex(
|
||||
r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
|
||||
'limelight id')
|
||||
r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id')
|
||||
|
||||
info = self._limelight_result(media_id)
|
||||
info.update({
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
})
|
||||
return info
|
||||
return self.url_result(
|
||||
'http://zonevideo.telequebec.tv/media/' + media_id,
|
||||
TeleQuebecIE.ie_key())
|
||||
|
||||
|
||||
class TeleQuebecLiveIE(InfoExtractor):
|
||||
class TeleQuebecLiveIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
|
||||
_TEST = {
|
||||
'url': 'http://zonevideo.telequebec.tv/endirect/',
|
||||
'info_dict': {
|
||||
'id': 'endirect',
|
||||
'id': '6159095684001',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
'description': 'Canal principal de Télé-Québec',
|
||||
'uploader_id': '6150020952001',
|
||||
'timestamp': 1590439901,
|
||||
'upload_date': '20200525',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -214,25 +204,49 @@ class TeleQuebecLiveIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._brightcove_result('6159095684001', 'skCsmi2Uw')
|
||||
|
||||
m3u8_url = None
|
||||
webpage = self._download_webpage(
|
||||
'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
|
||||
fatal=False)
|
||||
if webpage:
|
||||
m3u8_url = self._search_regex(
|
||||
r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', default=None, group='url')
|
||||
if not m3u8_url:
|
||||
m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title('Télé-Québec - En direct'),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
class TeleQuebecVideoIE(TeleQuebecBaseIE):
|
||||
_VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.telequebec.tv/player/31110/stream',
|
||||
'info_dict': {
|
||||
'id': '6202570652001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
|
||||
'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
|
||||
'upload_date': '20201019',
|
||||
'timestamp': 1603115930,
|
||||
'uploader_id': '6101674910001',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.telequebec.tv/player-live/28527',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
return self._download_json(
|
||||
'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
|
||||
video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
|
||||
|
||||
def _real_extract(self, url):
|
||||
asset_id = self._match_id(url)
|
||||
asset = self._call_api(asset_id, asset_id)['asset']
|
||||
stream = self._call_api(
|
||||
asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
|
||||
stream_url = stream['url']
|
||||
account_id = try_get(
|
||||
stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
|
||||
info = self._brightcove_result(stream_url, 'default', account_id)
|
||||
info.update({
|
||||
'description': asset.get('long_description') or asset.get('short_description'),
|
||||
'series': asset.get('series_original_name'),
|
||||
'season_number': int_or_none(asset.get('season_number')),
|
||||
'episode': asset.get('original_name'),
|
||||
'episode_number': int_or_none(asset.get('episode_number')),
|
||||
})
|
||||
return info
|
||||
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
compat_str,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
@@ -13,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class TikTokBaseIE(InfoExtractor):
|
||||
def _extract_aweme(self, data):
|
||||
def _extract_video(self, data, video_id=None):
|
||||
video = data['video']
|
||||
description = str_or_none(try_get(data, lambda x: x['desc']))
|
||||
width = int_or_none(try_get(data, lambda x: video['width']))
|
||||
@@ -21,43 +22,54 @@ class TikTokBaseIE(InfoExtractor):
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
for format_id in (
|
||||
'play_addr_lowbr', 'play_addr', 'play_addr_h264',
|
||||
'download_addr'):
|
||||
for format in try_get(
|
||||
video, lambda x: x[format_id]['url_list'], list) or []:
|
||||
format_url = url_or_none(format)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_url in format_urls:
|
||||
continue
|
||||
format_urls.add(format_url)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'height': height,
|
||||
'width': width,
|
||||
})
|
||||
for format_id in ('download', 'play'):
|
||||
format_url = url_or_none(video.get('%sAddr' % format_id))
|
||||
if not format_url:
|
||||
continue
|
||||
if format_url in format_urls:
|
||||
continue
|
||||
format_urls.add(format_url)
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'height': height,
|
||||
'width': width,
|
||||
'http_headers': {
|
||||
'Referer': 'https://www.tiktok.com/',
|
||||
}
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = url_or_none(try_get(
|
||||
video, lambda x: x['cover']['url_list'][0], compat_str))
|
||||
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
|
||||
timestamp = int_or_none(data.get('create_time'))
|
||||
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
|
||||
try_get(data, lambda x: x['statistics']['comment_count']))
|
||||
repost_count = int_or_none(try_get(
|
||||
data, lambda x: x['statistics']['share_count']))
|
||||
thumbnail = url_or_none(video.get('cover'))
|
||||
duration = float_or_none(video.get('duration'))
|
||||
|
||||
aweme_id = data['aweme_id']
|
||||
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
|
||||
uploader_id = try_get(data, lambda x: x['author']['id'], compat_str)
|
||||
|
||||
timestamp = int_or_none(data.get('createTime'))
|
||||
|
||||
def stats(key):
|
||||
return int_or_none(try_get(
|
||||
data, lambda x: x['stats']['%sCount' % key]))
|
||||
|
||||
view_count = stats('play')
|
||||
like_count = stats('digg')
|
||||
comment_count = stats('comment')
|
||||
repost_count = stats('share')
|
||||
|
||||
aweme_id = data.get('id') or video_id
|
||||
|
||||
return {
|
||||
'id': aweme_id,
|
||||
'title': uploader or aweme_id,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'repost_count': repost_count,
|
||||
'formats': formats,
|
||||
@@ -65,62 +77,56 @@ class TikTokBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class TikTokIE(TikTokBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:m\.)?tiktok\.com/v|
|
||||
(?:www\.)?tiktok\.com/share/video
|
||||
)
|
||||
/(?P<id>\d+)
|
||||
'''
|
||||
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@[^/]+/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
|
||||
'md5': 'd584b572e92fcd48888051f238022420',
|
||||
'url': 'https://www.tiktok.com/@zureeal/video/6606727368545406213',
|
||||
'md5': '163ceff303bb52de60e6887fe399e6cd',
|
||||
'info_dict': {
|
||||
'id': '6606727368545406213',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zureeal',
|
||||
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
|
||||
'thumbnail': r're:^https?://.*~noop.image',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 15,
|
||||
'uploader': 'Zureeal',
|
||||
'uploader_id': '188294915489964032',
|
||||
'timestamp': 1538248586,
|
||||
'upload_date': '20180929',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.tiktok.com/share/video/6606727368545406213',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
# Setup session (will set necessary cookies)
|
||||
self._request_webpage(
|
||||
'https://www.tiktok.com/', None, note='Setting up session')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'https://m.tiktok.com/v/%s.html' % video_id, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
|
||||
return self._extract_aweme(data)
|
||||
r'<script[^>]+\bid=["\']__NEXT_DATA__[^>]+>\s*({.+?})\s*</script',
|
||||
webpage, 'data'), video_id)['props']['pageProps']['itemInfo']['itemStruct']
|
||||
return self._extract_video(data, video_id)
|
||||
|
||||
|
||||
class TikTokUserIE(TikTokBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:m\.)?tiktok\.com/h5/share/usr|
|
||||
(?:www\.)?tiktok\.com/share/user
|
||||
)
|
||||
/(?P<id>\d+)
|
||||
'''
|
||||
_VALID_URL = r'https://(?:www\.)?tiktok\.com/@(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
|
||||
'url': 'https://www.tiktok.com/@zureeal',
|
||||
'info_dict': {
|
||||
'id': '188294915489964032',
|
||||
},
|
||||
'playlist_mincount': 24,
|
||||
}, {
|
||||
'url': 'https://www.tiktok.com/share/user/188294915489964032',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WORKING = False
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if TikTokIE.suitable(url) else super(TikTokUserIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
@@ -130,7 +136,7 @@ class TikTokUserIE(TikTokBaseIE):
|
||||
entries = []
|
||||
for aweme in data['aweme_list']:
|
||||
try:
|
||||
entry = self._extract_aweme(aweme)
|
||||
entry = self._extract_video(aweme)
|
||||
except ExtractorError:
|
||||
continue
|
||||
entry['extractor_key'] = TikTokIE.ie_key()
|
||||
|
@@ -11,13 +11,13 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ToggleIE(InfoExtractor):
|
||||
IE_NAME = 'toggle'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
||||
'info_dict': {
|
||||
@@ -84,28 +84,12 @@ class ToggleIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_PREFERENCES = {
|
||||
'wvm-STBMain': -10,
|
||||
'wvm-iPadMain': -20,
|
||||
'wvm-iPhoneMain': -30,
|
||||
'wvm-Android': -40,
|
||||
}
|
||||
_API_USER = 'tvpapi_147'
|
||||
_API_PASS = '11111'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, note='Downloading video page')
|
||||
|
||||
api_user = self._search_regex(
|
||||
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
|
||||
default=self._API_USER, group='user')
|
||||
api_pass = self._search_regex(
|
||||
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
|
||||
default=self._API_PASS, group='pass')
|
||||
|
||||
params = {
|
||||
'initObj': {
|
||||
'Locale': {
|
||||
@@ -118,17 +102,16 @@ class ToggleIE(InfoExtractor):
|
||||
'SiteGuid': 0,
|
||||
'DomainID': '0',
|
||||
'UDID': '',
|
||||
'ApiUser': api_user,
|
||||
'ApiPass': api_pass
|
||||
'ApiUser': self._API_USER,
|
||||
'ApiPass': self._API_PASS
|
||||
},
|
||||
'MediaID': video_id,
|
||||
'mediaType': 0,
|
||||
}
|
||||
|
||||
req = sanitized_Request(
|
||||
info = self._download_json(
|
||||
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
|
||||
json.dumps(params).encode('utf-8'))
|
||||
info = self._download_json(req, video_id, 'Downloading video info json')
|
||||
video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
|
||||
|
||||
title = info['MediaName']
|
||||
|
||||
@@ -141,11 +124,16 @@ class ToggleIE(InfoExtractor):
|
||||
vid_format = vid_format.replace(' ', '')
|
||||
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
video_url, video_id, ext='mp4', m3u8_id=vid_format,
|
||||
note='Downloading %s m3u8 information' % vid_format,
|
||||
errnote='Failed to download %s m3u8 information' % vid_format,
|
||||
fatal=False))
|
||||
fatal=False)
|
||||
for f in m3u8_formats:
|
||||
# Apple FairPlay Streaming
|
||||
if '/fpshls/' in f['url']:
|
||||
continue
|
||||
formats.append(f)
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id=vid_format,
|
||||
@@ -158,28 +146,21 @@ class ToggleIE(InfoExtractor):
|
||||
note='Downloading %s ISM manifest' % vid_format,
|
||||
errnote='Failed to download %s ISM manifest' % vid_format,
|
||||
fatal=False))
|
||||
elif ext in ('mp4', 'wvm'):
|
||||
# wvm are drm-protected files
|
||||
elif ext == 'mp4':
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'url': video_url,
|
||||
'format_id': vid_format,
|
||||
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
|
||||
'format_note': 'DRM-protected video' if ext == 'wvm' else None
|
||||
})
|
||||
if not formats:
|
||||
for meta in (info.get('Metas') or []):
|
||||
if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
|
||||
raise ExtractorError(
|
||||
'This video is DRM protected.', expected=True)
|
||||
# Most likely because geo-blocked
|
||||
raise ExtractorError('No downloadable videos found', expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(info.get('Duration'))
|
||||
description = info.get('Description')
|
||||
created_at = parse_iso8601(info.get('CreationDate') or None)
|
||||
|
||||
average_rating = float_or_none(info.get('Rating'))
|
||||
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
|
||||
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
|
||||
|
||||
thumbnails = []
|
||||
for picture in info.get('Pictures', []):
|
||||
if not isinstance(picture, dict):
|
||||
@@ -199,15 +180,46 @@ class ToggleIE(InfoExtractor):
|
||||
})
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
def counter(prefix):
|
||||
return int_or_none(
|
||||
info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': created_at,
|
||||
'average_rating': average_rating,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'description': strip_or_none(info.get('Description')),
|
||||
'duration': int_or_none(info.get('Duration')),
|
||||
'timestamp': parse_iso8601(info.get('CreationDate') or None),
|
||||
'average_rating': float_or_none(info.get('Rating')),
|
||||
'view_count': counter('View'),
|
||||
'like_count': counter('Like'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MeWatchIE(InfoExtractor):
|
||||
IE_NAME = 'mewatch'
|
||||
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
|
||||
'info_dict': {
|
||||
'id': '1008625',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recipe Of Life 味之道',
|
||||
'timestamp': 1603306526,
|
||||
'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
|
||||
'upload_date': '20201021',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8 download',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
custom_id = self._download_json(
|
||||
'https://cdn.mewatch.sg/api/items/' + item_id,
|
||||
item_id, query={'segments': 'all'})['customId']
|
||||
return self.url_result(
|
||||
'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)
|
||||
|
@@ -33,6 +33,19 @@ class TubiTvIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://tubitv.com/movies/383676/tracker',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
|
||||
'info_dict': {
|
||||
'id': '560057',
|
||||
'ext': 'mp4',
|
||||
'title': 'Penitentiary',
|
||||
'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9',
|
||||
'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
|
||||
'release_year': 1979,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
@@ -93,4 +106,5 @@ class TubiTvIE(InfoExtractor):
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'uploader_id': video_data.get('publisher_id'),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
}
|
||||
|
@@ -4,7 +4,9 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,7 +25,8 @@ class TVAIE(InfoExtractor):
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'HTTP Error 404: Not Found',
|
||||
}, {
|
||||
'url': 'https://video.tva.ca/details/_5596811470001',
|
||||
'only_matching': True,
|
||||
@@ -32,26 +35,54 @@ class TVAIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
}, query={
|
||||
'appId': '5955fc5f23eec60006c951f1',
|
||||
})
|
||||
|
||||
def get_attribute(key):
|
||||
for attribute in video_data.get('attributes', []):
|
||||
if attribute.get('key') == key:
|
||||
return attribute.get('value')
|
||||
return None
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': get_attribute('title'),
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
|
||||
'description': get_attribute('description'),
|
||||
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
|
||||
'duration': float_or_none(get_attribute('video-duration'), 1000),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
|
||||
class QubIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
|
||||
'md5': '949490fd0e7aee11d0543777611fbd53',
|
||||
'info_dict': {
|
||||
'id': '6084352463001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Épisode 01',
|
||||
'uploader_id': '5481942443001',
|
||||
'upload_date': '20190907',
|
||||
'timestamp': 1567899756,
|
||||
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
|
||||
'only_matching': True,
|
||||
}]
|
||||
# reference_id also works with old account_id(5481942443001)
|
||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._download_json(
|
||||
'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
|
||||
entity_id, query={'id': entity_id})
|
||||
video_id = entity['videoId']
|
||||
episode = strip_or_none(entity.get('name'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': episode,
|
||||
# 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
|
||||
'url': 'https://videos.tva.ca/details/_' + video_id,
|
||||
'description': entity.get('longDescription'),
|
||||
'duration': float_or_none(entity.get('durationMillis'), 1000),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(entity.get('episodeNumber')),
|
||||
# 'ie_key': 'BrightcoveNew',
|
||||
'ie_key': TVAIE.ie_key(),
|
||||
}
|
||||
|
67
youtube_dl/extractor/tver.py
Normal file
67
youtube_dl/extractor/tver.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TVerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))'
|
||||
# videos are only available for 7 days
|
||||
_TESTS = [{
|
||||
'url': 'https://tver.jp/corner/f0062178',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/feature/f0062413',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tver.jp/episode/79622438',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_TOKEN = None
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._TOKEN = self._download_json(
|
||||
'https://tver.jp/api/access_token.php', None)['token']
|
||||
|
||||
def _real_extract(self, url):
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
main = self._download_json(
|
||||
'https://api.tver.jp/v4/' + path, video_id,
|
||||
query={'token': self._TOKEN})['main']
|
||||
p_id = main['publisher_id']
|
||||
service = remove_start(main['service'], 'ts_')
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
|
||||
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
|
||||
}
|
||||
|
||||
if service == 'cx':
|
||||
info.update({
|
||||
'title': main.get('subtitle') or main['title'],
|
||||
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
|
||||
'ie_key': 'FujiTVFODPlus7',
|
||||
})
|
||||
else:
|
||||
r_id = main['reference_id']
|
||||
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
|
||||
r_id = 'ref:' + r_id
|
||||
bc_url = smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
|
||||
{'geo_countries': ['JP']})
|
||||
info.update({
|
||||
'url': bc_url,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
})
|
||||
|
||||
return info
|
@@ -12,11 +12,13 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -414,7 +416,7 @@ class ViafreeIE(InfoExtractor):
|
||||
|
||||
|
||||
class TVPlayHomeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
|
||||
'info_dict': {
|
||||
@@ -433,80 +435,58 @@ class TVPlayHomeIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.tv3.lt/aferistai-10047125',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
asset = self._download_json(
|
||||
urljoin(url, '/sb/public/asset/' + video_id), video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
|
||||
|
||||
if len(video_id) < 8:
|
||||
return self.url_result(
|
||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
m3u8_url = asset['movie']['contentUrl']
|
||||
video_id = asset['assetId']
|
||||
asset_title = asset['title']
|
||||
title = asset_title['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', default=None, group='value') or self._html_search_meta(
|
||||
'title', webpage, default=None) or self._og_search_title(
|
||||
webpage)
|
||||
thumbnails = None
|
||||
image_url = asset.get('imageUrl')
|
||||
if image_url:
|
||||
thumbnails = [{
|
||||
'url': urljoin(url, image_url),
|
||||
'ext': 'jpg',
|
||||
}]
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage,
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', default=None, group='url') or self._html_search_meta(
|
||||
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
|
||||
webpage)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
season = self._search_regex(
|
||||
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
|
||||
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'season', default=None, group='value')
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||
default=None))
|
||||
episode = self._search_regex(
|
||||
(r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'episode', default=None, group='value')
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||
default=None))
|
||||
metadata = asset.get('metadata') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': parse_duration(asset_title.get('runTime')),
|
||||
'series': asset.get('tvSeriesTitle'),
|
||||
'season': asset.get('tvSeasonTitle'),
|
||||
'season_number': int_or_none(metadata.get('seasonNumber')),
|
||||
'episode': asset_title.get('titleBrief'),
|
||||
'episode_number': int_or_none(metadata.get('episodeNumber')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,11 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
|
||||
@@ -17,8 +26,12 @@ class TwitCastingIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live #2357609',
|
||||
'uploader_id': 'ivetesangalo',
|
||||
'description': "Moi! I'm live on TwitCasting from my iPhone.",
|
||||
'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20110822',
|
||||
'timestamp': 1314010824,
|
||||
'duration': 32,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -30,8 +43,12 @@ class TwitCastingIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Live playing something #3689740',
|
||||
'uploader_id': 'mttbernardini',
|
||||
'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",
|
||||
'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20120212',
|
||||
'timestamp': 1329028024,
|
||||
'duration': 681,
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -40,9 +57,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
uploader_id = mobj.group('uploader_id')
|
||||
uploader_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
video_password = self._downloader.params.get('videopassword')
|
||||
request_data = None
|
||||
@@ -52,30 +67,45 @@ class TwitCastingIE(InfoExtractor):
|
||||
})
|
||||
webpage = self._download_webpage(url, video_id, data=request_data)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, fatal=True)
|
||||
title = clean_html(get_element_by_id(
|
||||
'movietitle', webpage)) or self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, fatal=True)
|
||||
|
||||
video_js_data = {}
|
||||
m3u8_url = self._search_regex(
|
||||
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
|
||||
webpage, 'm3u8 url', group='url')
|
||||
r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
video_js_data = self._parse_json(self._search_regex(
|
||||
r"data-movie-playlist='(\[[^']+\])'",
|
||||
webpage, 'movie playlist'), video_id)[0]
|
||||
m3u8_url = video_js_data['source']['url']
|
||||
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage)
|
||||
thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
|
||||
description = clean_html(get_element_by_id(
|
||||
'authorcomment', webpage)) or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage)
|
||||
duration = float_or_none(video_js_data.get(
|
||||
'duration'), 1000) or parse_duration(clean_html(
|
||||
get_element_by_class('tw-player-duration-time', webpage)))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None))
|
||||
timestamp = unified_timestamp(self._search_regex(
|
||||
r'data-toggle="true"[^>]+datetime="([^"]+)"',
|
||||
webpage, 'datetime', None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader_id': uploader_id,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,16 +1,25 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_codecs,
|
||||
update_url_query,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_ord,
|
||||
compat_struct_pack,
|
||||
)
|
||||
|
||||
|
||||
class VideaIE(InfoExtractor):
|
||||
@@ -19,7 +28,7 @@ class VideaIE(InfoExtractor):
|
||||
videa(?:kid)?\.hu/
|
||||
(?:
|
||||
videok/(?:[^/]+/)*[^?#&]+-|
|
||||
player\?.*?\bv=|
|
||||
(?:videojs_)?player\?.*?\bv=|
|
||||
player/v/
|
||||
)
|
||||
(?P<id>[^?#&]+)
|
||||
@@ -53,6 +62,7 @@ class VideaIE(InfoExtractor):
|
||||
'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
@@ -60,26 +70,84 @@ class VideaIE(InfoExtractor):
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
|
||||
webpage)]
|
||||
|
||||
@staticmethod
|
||||
def rc4(cipher_text, key):
|
||||
res = b''
|
||||
|
||||
key_len = len(key)
|
||||
S = list(range(256))
|
||||
|
||||
j = 0
|
||||
for i in range(256):
|
||||
j = (j + S[i] + ord(key[i % key_len])) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
|
||||
i = 0
|
||||
j = 0
|
||||
for m in range(len(cipher_text)):
|
||||
i = (i + 1) % 256
|
||||
j = (j + S[i]) % 256
|
||||
S[i], S[j] = S[j], S[i]
|
||||
k = S[(S[i] + S[j]) % 256]
|
||||
res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
|
||||
|
||||
return res.decode()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
query = {'v': video_id}
|
||||
player_page = self._download_webpage(
|
||||
'https://videa.hu/player', video_id, query=query)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://videa.hu/videaplayer_get_xml.php', video_id,
|
||||
query={'v': video_id})
|
||||
nonce = self._search_regex(
|
||||
r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
|
||||
l = nonce[:32]
|
||||
s = nonce[32:]
|
||||
result = ''
|
||||
for i in range(0, 32):
|
||||
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
|
||||
|
||||
video = xpath_element(info, './/video', 'video', fatal=True)
|
||||
sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
|
||||
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
|
||||
query['_s'] = random_seed
|
||||
query['_t'] = result[:16]
|
||||
|
||||
b64_info, handle = self._download_webpage_handle(
|
||||
'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
|
||||
if b64_info.startswith('<?xml'):
|
||||
info = self._parse_xml(b64_info, video_id)
|
||||
else:
|
||||
key = result[16:] + random_seed + handle.headers['x-videa-xs']
|
||||
info = self._parse_xml(self.rc4(
|
||||
compat_b64decode(b64_info), key), video_id)
|
||||
|
||||
video = xpath_element(info, './video', 'video')
|
||||
if not video:
|
||||
raise ExtractorError(xpath_element(
|
||||
info, './error', fatal=True), expected=True)
|
||||
sources = xpath_element(
|
||||
info, './video_sources', 'sources', fatal=True)
|
||||
hash_values = xpath_element(
|
||||
info, './hash_values', 'hash values', fatal=True)
|
||||
|
||||
title = xpath_text(video, './title', fatal=True)
|
||||
|
||||
formats = []
|
||||
for source in sources.findall('./video_source'):
|
||||
source_url = source.text
|
||||
if not source_url:
|
||||
source_name = source.get('name')
|
||||
source_exp = source.get('exp')
|
||||
if not (source_url and source_name and source_exp):
|
||||
continue
|
||||
hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
|
||||
if not hash_value:
|
||||
continue
|
||||
source_url = update_url_query(source_url, {
|
||||
'md5': hash_value,
|
||||
'expires': source_exp,
|
||||
})
|
||||
f = parse_codecs(source.get('codecs'))
|
||||
f.update({
|
||||
'url': source_url,
|
||||
'url': self._proto_relative_url(source_url),
|
||||
'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
|
||||
'format_id': source.get('name'),
|
||||
'width': int_or_none(source.get('width')),
|
||||
@@ -88,8 +156,7 @@ class VideaIE(InfoExtractor):
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = xpath_text(video, './poster_src')
|
||||
duration = int_or_none(xpath_text(video, './duration'))
|
||||
thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
|
||||
|
||||
age_limit = None
|
||||
is_adult = xpath_text(video, './is_adult_content', default=None)
|
||||
@@ -100,7 +167,7 @@ class VideaIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'duration': int_or_none(xpath_text(video, './duration')),
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -13,6 +13,8 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
@@ -66,6 +68,10 @@ class VLiveIE(VLiveBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.vlive.tv/embed/1326',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# works only with gcc=KR
|
||||
'url': 'https://www.vlive.tv/video/225019',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@@ -100,26 +106,26 @@ class VLiveIE(VLiveBaseIE):
|
||||
raise ExtractorError('Unable to log in', expected=True)
|
||||
|
||||
def _call_api(self, path_template, video_id, fields=None):
|
||||
query = {'appId': self._APP_ID}
|
||||
query = {'appId': self._APP_ID, 'gcc': 'KR'}
|
||||
if fields:
|
||||
query['fields'] = fields
|
||||
return self._download_json(
|
||||
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
|
||||
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
|
||||
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
post = self._call_api(
|
||||
'post/v1.0/officialVideoPost-%s', video_id,
|
||||
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
|
||||
return self._download_json(
|
||||
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
|
||||
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
|
||||
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
post = self._call_api(
|
||||
'post/v1.0/officialVideoPost-%s', video_id,
|
||||
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
|
||||
|
||||
video = post['officialVideo']
|
||||
|
||||
def get_common_fields():
|
||||
@@ -149,6 +155,7 @@ class VLiveIE(VLiveBaseIE):
|
||||
'old/v3/live/%s/playInfo',
|
||||
video_id)['result']['adaptiveStreamUrl']
|
||||
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
info = get_common_fields()
|
||||
info.update({
|
||||
'title': self._live_title(video['title']),
|
||||
@@ -170,6 +177,83 @@ class VLiveIE(VLiveBaseIE):
|
||||
raise ExtractorError('Unknown status ' + status)
|
||||
|
||||
|
||||
class VLivePostIE(VLiveIE):
|
||||
IE_NAME = 'vlive:post'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
|
||||
_TESTS = [{
|
||||
# uploadType = SOS
|
||||
'url': 'https://www.vlive.tv/post/1-20088044',
|
||||
'info_dict': {
|
||||
'id': '1-20088044',
|
||||
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
|
||||
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# uploadType = V
|
||||
'url': 'https://www.vlive.tv/post/1-20087926',
|
||||
'info_dict': {
|
||||
'id': '1-20087926',
|
||||
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
|
||||
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
|
||||
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post = self._call_api(
|
||||
'post/v1.0/post-%s', post_id,
|
||||
'attachments{video},officialVideo{videoSeq},plainBody,title')
|
||||
|
||||
video_seq = str_or_none(try_get(
|
||||
post, lambda x: x['officialVideo']['videoSeq']))
|
||||
if video_seq:
|
||||
return self.url_result(
|
||||
'http://www.vlive.tv/video/' + video_seq,
|
||||
VLiveIE.ie_key(), video_seq)
|
||||
|
||||
title = post['title']
|
||||
entries = []
|
||||
for idx, video in enumerate(post['attachments']['video'].values()):
|
||||
video_id = video.get('videoId')
|
||||
if not video_id:
|
||||
continue
|
||||
upload_type = video.get('uploadType')
|
||||
upload_info = video.get('uploadInfo') or {}
|
||||
entry = None
|
||||
if upload_type == 'SOS':
|
||||
download = self._call_api(
|
||||
self._SOS_TMPL, video_id)['videoUrl']['download']
|
||||
formats = []
|
||||
for f_id, f_url in download.items():
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'url': f_url,
|
||||
'height': int_or_none(f_id[:-1]),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
entry = {
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'thumbnail': upload_info.get('imageUrl'),
|
||||
}
|
||||
elif upload_type == 'V':
|
||||
vod_id = upload_info.get('videoId')
|
||||
if not vod_id:
|
||||
continue
|
||||
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
|
||||
entry = self._extract_video_info(video_id, vod_id, inkey)
|
||||
if entry:
|
||||
entry['title'] = '%s_part%s' % (title, idx)
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, post_id, title, strip_or_none(post.get('plainBody')))
|
||||
|
||||
|
||||
class VLiveChannelIE(VLiveBaseIE):
|
||||
IE_NAME = 'vlive:channel'
|
||||
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
|
||||
|
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
urlhandle_detect_ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,16 +43,20 @@ class WDRIE(InfoExtractor):
|
||||
is_live = metadata.get('mediaType') == 'live'
|
||||
|
||||
tracker_data = metadata['trackerData']
|
||||
title = tracker_data['trackerClipTitle']
|
||||
|
||||
media_resource = metadata['mediaResource']
|
||||
|
||||
formats = []
|
||||
|
||||
# check if the metadata contains a direct URL to a file
|
||||
for kind, media_resource in media_resource.items():
|
||||
for kind, media in media_resource.items():
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
if kind not in ('dflt', 'alt'):
|
||||
continue
|
||||
|
||||
for tag_name, medium_url in media_resource.items():
|
||||
for tag_name, medium_url in media.items():
|
||||
if tag_name not in ('videoURL', 'audioURL'):
|
||||
continue
|
||||
|
||||
@@ -88,8 +93,16 @@ class WDRIE(InfoExtractor):
|
||||
'url': caption_url,
|
||||
'ext': 'ttml',
|
||||
}]
|
||||
|
||||
title = tracker_data['trackerClipTitle']
|
||||
captions_hash = media_resource.get('captionsHash')
|
||||
if isinstance(captions_hash, dict):
|
||||
for ext, format_url in captions_hash.items():
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
subtitles.setdefault('de', []).append({
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_url, None) or ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': tracker_data.get('trackerClipId', video_id),
|
||||
@@ -105,7 +118,7 @@ class WDRIE(InfoExtractor):
|
||||
class WDRPageIE(InfoExtractor):
|
||||
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
||||
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -212,7 +225,11 @@ class WDRPageIE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
||||
'only_matching': True,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -46,57 +46,69 @@ class YandexMusicBaseIE(InfoExtractor):
|
||||
self._handle_error(response)
|
||||
return response
|
||||
|
||||
def _call_api(self, ep, tld, url, item_id, note, query):
|
||||
return self._download_json(
|
||||
'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
|
||||
item_id, note,
|
||||
fatal=False,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-Retpath-Y': url,
|
||||
},
|
||||
query=query)
|
||||
|
||||
|
||||
class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
IE_NAME = 'yandexmusic:track'
|
||||
IE_DESC = 'Яндекс.Музыка - Трек'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://music.yandex.ru/album/540508/track/4878838',
|
||||
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
|
||||
'md5': 'dec8b661f12027ceaba33318787fff76',
|
||||
'info_dict': {
|
||||
'id': '4878838',
|
||||
'ext': 'mp3',
|
||||
'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
|
||||
'filesize': 4628061,
|
||||
'title': 'md5:c63e19341fdbe84e43425a30bc777856',
|
||||
'filesize': int,
|
||||
'duration': 193.04,
|
||||
'track': 'Gypsy Eyes 1',
|
||||
'album': 'Gypsy Soul',
|
||||
'album_artist': 'Carlo Ambrosio',
|
||||
'artist': 'Carlo Ambrosio & Fabio Di Bari',
|
||||
'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
|
||||
'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
|
||||
'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
|
||||
'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
|
||||
'release_year': 2009,
|
||||
},
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}, {
|
||||
# multiple disks
|
||||
'url': 'http://music.yandex.ru/album/3840501/track/705105',
|
||||
'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e',
|
||||
'md5': '82a54e9e787301dd45aba093cf6e58c0',
|
||||
'info_dict': {
|
||||
'id': '705105',
|
||||
'ext': 'mp3',
|
||||
'title': 'Hooverphonic - Sometimes',
|
||||
'filesize': 5743386,
|
||||
'title': 'md5:f86d4a9188279860a83000277024c1a6',
|
||||
'filesize': int,
|
||||
'duration': 239.27,
|
||||
'track': 'Sometimes',
|
||||
'album': 'The Best of Hooverphonic',
|
||||
'album_artist': 'Hooverphonic',
|
||||
'artist': 'Hooverphonic',
|
||||
'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
|
||||
'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
|
||||
'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
|
||||
'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
|
||||
'release_year': 2016,
|
||||
'genre': 'pop',
|
||||
'disc_number': 2,
|
||||
'track_number': 9,
|
||||
},
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
album_id, track_id = mobj.group('album_id'), mobj.group('id')
|
||||
tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
|
||||
|
||||
track = self._download_json(
|
||||
'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
|
||||
track_id, 'Downloading track JSON')['track']
|
||||
track = self._call_api(
|
||||
'track', tld, url, track_id, 'Downloading track JSON',
|
||||
{'track': '%s:%s' % (track_id, album_id)})['track']
|
||||
track_title = track['title']
|
||||
|
||||
download_data = self._download_json(
|
||||
@@ -109,8 +121,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
'Downloading track location JSON',
|
||||
query={'format': 'json'})
|
||||
key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
|
||||
storage = track['storageDir'].split('.')
|
||||
f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
|
||||
f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
|
||||
|
||||
thumbnail = None
|
||||
cover_uri = track.get('albums', [{}])[0].get('coverUri')
|
||||
@@ -180,46 +191,100 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
|
||||
|
||||
|
||||
class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
|
||||
def _extract_tracks(self, source, item_id, url, tld):
|
||||
tracks = source['tracks']
|
||||
track_ids = [compat_str(track_id) for track_id in source['trackIds']]
|
||||
|
||||
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
|
||||
# missing tracks should be retrieved manually.
|
||||
if len(tracks) < len(track_ids):
|
||||
present_track_ids = set([
|
||||
compat_str(track['id'])
|
||||
for track in tracks if track.get('id')])
|
||||
missing_track_ids = [
|
||||
track_id for track_id in track_ids
|
||||
if track_id not in present_track_ids]
|
||||
missing_tracks = self._call_api(
|
||||
'track-entries', tld, url, item_id,
|
||||
'Downloading missing tracks JSON', {
|
||||
'entries': ','.join(missing_track_ids),
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
'strict': 'true',
|
||||
})
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
|
||||
return tracks
|
||||
|
||||
def _build_playlist(self, tracks):
|
||||
return [
|
||||
self.url_result(
|
||||
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
|
||||
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
|
||||
entries = []
|
||||
for track in tracks:
|
||||
track_id = track.get('id') or track.get('realId')
|
||||
if not track_id:
|
||||
continue
|
||||
albums = track.get('albums')
|
||||
if not albums or not isinstance(albums, list):
|
||||
continue
|
||||
album = albums[0]
|
||||
if not isinstance(album, dict):
|
||||
continue
|
||||
album_id = album.get('id')
|
||||
if not album_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
|
||||
ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
|
||||
return entries
|
||||
|
||||
|
||||
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
|
||||
IE_NAME = 'yandexmusic:album'
|
||||
IE_DESC = 'Яндекс.Музыка - Альбом'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://music.yandex.ru/album/540508',
|
||||
'info_dict': {
|
||||
'id': '540508',
|
||||
'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
|
||||
'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
|
||||
},
|
||||
'playlist_count': 50,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}, {
|
||||
'url': 'https://music.yandex.ru/album/3840501',
|
||||
'info_dict': {
|
||||
'id': '3840501',
|
||||
'title': 'Hooverphonic - The Best of Hooverphonic (2016)',
|
||||
'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
|
||||
},
|
||||
'playlist_count': 33,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}, {
|
||||
# empty artists
|
||||
'url': 'https://music.yandex.ru/album/9091882',
|
||||
'info_dict': {
|
||||
'id': '9091882',
|
||||
'title': 'ТЕД на русском',
|
||||
},
|
||||
'playlist_count': 187,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
tld = mobj.group('tld')
|
||||
album_id = mobj.group('id')
|
||||
|
||||
album = self._download_json(
|
||||
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
|
||||
album_id, 'Downloading album JSON')
|
||||
album = self._call_api(
|
||||
'album', tld, url, album_id, 'Downloading album JSON',
|
||||
{'album': album_id})
|
||||
|
||||
entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
|
||||
|
||||
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
|
||||
title = album['title']
|
||||
artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
|
||||
if artist:
|
||||
title = '%s - %s' % (artist, title)
|
||||
year = album.get('year')
|
||||
if year:
|
||||
title += ' (%s)' % year
|
||||
@@ -236,21 +301,24 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
|
||||
'info_dict': {
|
||||
'id': '1245',
|
||||
'title': 'Что слушают Enter Shikari',
|
||||
'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
|
||||
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
'playlist_count': 5,
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}, {
|
||||
# playlist exceeding the limit of 150 tracks shipped with webpage (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/6666)
|
||||
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# playlist exceeding the limit of 150 tracks (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/6666)
|
||||
'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
|
||||
'info_dict': {
|
||||
'id': '1036',
|
||||
'title': 'Музыка 90-х',
|
||||
'id': '1364',
|
||||
'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
|
||||
},
|
||||
'playlist_mincount': 300,
|
||||
'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
'playlist_mincount': 437,
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -259,16 +327,8 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
user = mobj.group('user')
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
playlist = self._download_json(
|
||||
'https://music.yandex.%s/handlers/playlist.jsx' % tld,
|
||||
playlist_id, 'Downloading missing tracks JSON',
|
||||
fatal=False,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'X-Retpath-Y': url,
|
||||
},
|
||||
query={
|
||||
playlist = self._call_api(
|
||||
'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
|
||||
'owner': user,
|
||||
'kinds': playlist_id,
|
||||
'light': 'true',
|
||||
@@ -277,37 +337,103 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
|
||||
'overembed': 'false',
|
||||
})['playlist']
|
||||
|
||||
tracks = playlist['tracks']
|
||||
track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
|
||||
|
||||
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
|
||||
# missing tracks should be retrieved manually.
|
||||
if len(tracks) < len(track_ids):
|
||||
present_track_ids = set([
|
||||
compat_str(track['id'])
|
||||
for track in tracks if track.get('id')])
|
||||
missing_track_ids = [
|
||||
track_id for track_id in track_ids
|
||||
if track_id not in present_track_ids]
|
||||
missing_tracks = self._download_json(
|
||||
'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
|
||||
playlist_id, 'Downloading missing tracks JSON',
|
||||
fatal=False,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
},
|
||||
query={
|
||||
'entries': ','.join(missing_track_ids),
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
'strict': 'true',
|
||||
})
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
tracks = self._extract_tracks(playlist, playlist_id, url, tld)
|
||||
|
||||
return self.playlist_result(
|
||||
self._build_playlist(tracks),
|
||||
compat_str(playlist_id),
|
||||
playlist.get('title'), playlist.get('description'))
|
||||
|
||||
|
||||
class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
|
||||
def _call_artist(self, tld, url, artist_id):
|
||||
return self._call_api(
|
||||
'artist', tld, url, artist_id,
|
||||
'Downloading artist %s JSON' % self._ARTIST_WHAT, {
|
||||
'artist': artist_id,
|
||||
'what': self._ARTIST_WHAT,
|
||||
'sort': self._ARTIST_SORT or '',
|
||||
'dir': '',
|
||||
'period': '',
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
tld = mobj.group('tld')
|
||||
artist_id = mobj.group('id')
|
||||
data = self._call_artist(tld, url, artist_id)
|
||||
tracks = self._extract_tracks(data, artist_id, url, tld)
|
||||
title = try_get(data, lambda x: x['artist']['name'], compat_str)
|
||||
return self.playlist_result(
|
||||
self._build_playlist(tracks), artist_id, title)
|
||||
|
||||
|
||||
class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
|
||||
IE_NAME = 'yandexmusic:artist:tracks'
|
||||
IE_DESC = 'Яндекс.Музыка - Артист - Треки'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/tracks'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://music.yandex.ru/artist/617526/tracks',
|
||||
'info_dict': {
|
||||
'id': '617526',
|
||||
'title': 'md5:131aef29d45fd5a965ca613e708c040b',
|
||||
},
|
||||
'playlist_count': 507,
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
_ARTIST_SORT = ''
|
||||
_ARTIST_WHAT = 'tracks'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
tld = mobj.group('tld')
|
||||
artist_id = mobj.group('id')
|
||||
data = self._call_artist(tld, url, artist_id)
|
||||
tracks = self._extract_tracks(data, artist_id, url, tld)
|
||||
artist = try_get(data, lambda x: x['artist']['name'], compat_str)
|
||||
title = '%s - %s' % (artist or artist_id, 'Треки')
|
||||
return self.playlist_result(
|
||||
self._build_playlist(tracks), artist_id, title)
|
||||
|
||||
|
||||
class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
|
||||
IE_NAME = 'yandexmusic:artist:albums'
|
||||
IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
|
||||
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/albums'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://music.yandex.ru/artist/617526/albums',
|
||||
'info_dict': {
|
||||
'id': '617526',
|
||||
'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
# 'skip': 'Travis CI servers blocked by YandexMusic',
|
||||
}]
|
||||
|
||||
_ARTIST_SORT = 'year'
|
||||
_ARTIST_WHAT = 'albums'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
tld = mobj.group('tld')
|
||||
artist_id = mobj.group('id')
|
||||
data = self._call_artist(tld, url, artist_id)
|
||||
entries = []
|
||||
for album in data['albums']:
|
||||
if not isinstance(album, dict):
|
||||
continue
|
||||
album_id = album.get('id')
|
||||
if not album_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://music.yandex.ru/album/%s' % album_id,
|
||||
ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
|
||||
artist = try_get(data, lambda x: x['artist']['name'], compat_str)
|
||||
title = '%s - %s' % (artist or artist_id, 'Альбомы')
|
||||
return self.playlist_result(entries, artist_id, title)
|
||||
|
@@ -67,11 +67,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
|
||||
|
||||
_YOUTUBE_CLIENT_HEADERS = {
|
||||
'x-youtube-client-name': '1',
|
||||
'x-youtube-client-version': '1.20200609.04.02',
|
||||
}
|
||||
|
||||
def _set_language(self):
|
||||
self._set_cookie(
|
||||
'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
|
||||
@@ -283,6 +278,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
}
|
||||
|
||||
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
|
||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||
|
||||
def _call_api(self, ep, query, video_id):
|
||||
data = self._DEFAULT_API_DATA.copy()
|
||||
@@ -323,10 +319,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:www\.)?invidious\.kabi\.tk/|
|
||||
(?:www\.)?invidious\.13ad\.de/|
|
||||
(?:www\.)?invidious\.mastodon\.host/|
|
||||
(?:www\.)?invidious\.zapashcanon\.fr/|
|
||||
(?:www\.)?invidious\.kavin\.rocks/|
|
||||
(?:www\.)?invidious\.tube/|
|
||||
(?:www\.)?invidiou\.site/|
|
||||
(?:www\.)?invidious\.site/|
|
||||
(?:www\.)?invidious\.xyz/|
|
||||
(?:www\.)?invidious\.nixnet\.xyz/|
|
||||
(?:www\.)?invidious\.drycat\.fr/|
|
||||
(?:www\.)?tube\.poal\.co/|
|
||||
(?:www\.)?tube\.connect\.cafe/|
|
||||
(?:www\.)?vid\.wxzm\.sx/|
|
||||
(?:www\.)?vid\.mint\.lgbt/|
|
||||
(?:www\.)?yewtu\.be/|
|
||||
(?:www\.)?yt\.elukerio\.org/|
|
||||
(?:www\.)?yt\.lelux\.fi/|
|
||||
@@ -601,7 +605,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
|
||||
}
|
||||
},
|
||||
# Normal age-gate video (No vevo, embed allowed)
|
||||
# Normal age-gate video (No vevo, embed allowed), available via embed page
|
||||
{
|
||||
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
|
||||
'info_dict': {
|
||||
@@ -617,6 +621,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Age-gated video only available with authentication (unavailable
|
||||
# via embed page workaround)
|
||||
'url': 'XgnwCQzjau8',
|
||||
'only_matching': True,
|
||||
},
|
||||
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
|
||||
# YouTube Red ad is not captured for creator
|
||||
{
|
||||
@@ -1068,7 +1078,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
},
|
||||
},
|
||||
{
|
||||
# with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
|
||||
# with '};' inside yt initial data (see [1])
|
||||
# see [2] for an example with '};' inside ytInitialPlayerResponse
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
# 2. https://github.com/ytdl-org/youtube-dl/issues/27216
|
||||
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
|
||||
'info_dict': {
|
||||
'id': 'CHqg6qOn4no',
|
||||
@@ -1633,8 +1646,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
|
||||
or re.search(r'player-age-gate-content">', video_webpage) is not None):
|
||||
|
||||
if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None:
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
# this can be viewed without login into Youtube
|
||||
@@ -1686,7 +1699,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info and not player_response:
|
||||
player_response = extract_player_response(
|
||||
self._search_regex(
|
||||
r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage,
|
||||
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
|
||||
'initial player response', default='{}'),
|
||||
video_id)
|
||||
|
||||
@@ -2682,6 +2696,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
# no longer available?
|
||||
'url': 'https://www.youtube.com/feed/recommended',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# inline playlist with not always working continuations
|
||||
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
|
||||
'only_matching': True,
|
||||
}
|
||||
# TODO
|
||||
# {
|
||||
@@ -2785,12 +2803,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
def _shelf_entries(self, shelf_renderer):
|
||||
def _shelf_entries(self, shelf_renderer, skip_channels=False):
|
||||
ep = try_get(
|
||||
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
||||
compat_str)
|
||||
shelf_url = urljoin('https://www.youtube.com', ep)
|
||||
if shelf_url:
|
||||
# Skipping links to another channels, note that checking for
|
||||
# endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
|
||||
# will not work
|
||||
if skip_channels and '/channels?' in shelf_url:
|
||||
return
|
||||
title = try_get(
|
||||
shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
|
||||
yield self.url_result(shelf_url, video_title=title)
|
||||
@@ -2855,6 +2878,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
for entry in self._post_thread_entries(renderer):
|
||||
yield entry
|
||||
|
||||
@staticmethod
|
||||
def _build_continuation_query(continuation, ctp=None):
|
||||
query = {
|
||||
'ctoken': continuation,
|
||||
'continuation': continuation,
|
||||
}
|
||||
if ctp:
|
||||
query['itct'] = ctp
|
||||
return query
|
||||
|
||||
@staticmethod
|
||||
def _extract_next_continuation_data(renderer):
|
||||
next_continuation = try_get(
|
||||
@@ -2865,11 +2898,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
if not continuation:
|
||||
return
|
||||
ctp = next_continuation.get('clickTrackingParams')
|
||||
return {
|
||||
'ctoken': continuation,
|
||||
'continuation': continuation,
|
||||
'itct': ctp,
|
||||
}
|
||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
||||
|
||||
@classmethod
|
||||
def _extract_continuation(cls, renderer):
|
||||
@@ -2892,18 +2921,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
if not continuation:
|
||||
continue
|
||||
ctp = continuation_ep.get('clickTrackingParams')
|
||||
if not ctp:
|
||||
continue
|
||||
return {
|
||||
'ctoken': continuation,
|
||||
'continuation': continuation,
|
||||
'itct': ctp,
|
||||
}
|
||||
return YoutubeTabIE._build_continuation_query(continuation, ctp)
|
||||
|
||||
def _entries(self, tab, identity_token):
|
||||
slr_renderer = try_get(tab, lambda x: x['sectionListRenderer'], dict)
|
||||
tab_content = try_get(tab, lambda x: x['content'], dict)
|
||||
if not tab_content:
|
||||
return
|
||||
slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
|
||||
if not slr_renderer:
|
||||
return
|
||||
is_channels_tab = tab.get('title') == 'Channels'
|
||||
continuation = None
|
||||
slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
|
||||
for slr_content in slr_contents:
|
||||
@@ -2930,7 +2957,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
continue
|
||||
renderer = isr_content.get('shelfRenderer')
|
||||
if renderer:
|
||||
for entry in self._shelf_entries(renderer):
|
||||
for entry in self._shelf_entries(renderer, not is_channels_tab):
|
||||
yield entry
|
||||
continue
|
||||
renderer = isr_content.get('backstagePostThreadRenderer')
|
||||
@@ -3040,6 +3067,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
|
||||
return uploader
|
||||
|
||||
@staticmethod
|
||||
def _extract_alert(data):
|
||||
alerts = []
|
||||
for alert in try_get(data, lambda x: x['alerts'], list) or []:
|
||||
if not isinstance(alert, dict):
|
||||
continue
|
||||
alert_text = try_get(
|
||||
alert, lambda x: x['alertRenderer']['text'], dict)
|
||||
if not alert_text:
|
||||
continue
|
||||
text = try_get(
|
||||
alert_text,
|
||||
(lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
|
||||
compat_str)
|
||||
if text:
|
||||
alerts.append(text)
|
||||
return '\n'.join(alerts)
|
||||
|
||||
def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
|
||||
selected_tab = self._extract_selected_tab(tabs)
|
||||
renderer = try_get(
|
||||
@@ -3060,20 +3105,43 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
description = None
|
||||
playlist_id = item_id
|
||||
playlist = self.playlist_result(
|
||||
self._entries(selected_tab['content'], identity_token),
|
||||
self._entries(selected_tab, identity_token),
|
||||
playlist_id=playlist_id, playlist_title=title,
|
||||
playlist_description=description)
|
||||
playlist.update(self._extract_uploader(data))
|
||||
return playlist
|
||||
|
||||
def _extract_from_playlist(self, item_id, data, playlist):
|
||||
def _extract_from_playlist(self, item_id, url, data, playlist):
|
||||
title = playlist.get('title') or try_get(
|
||||
data, lambda x: x['titleText']['simpleText'], compat_str)
|
||||
playlist_id = playlist.get('playlistId') or item_id
|
||||
# Inline playlist rendition continuation does not always work
|
||||
# at Youtube side, so delegating regular tab-based playlist URL
|
||||
# processing whenever possible.
|
||||
playlist_url = urljoin(url, try_get(
|
||||
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
||||
compat_str))
|
||||
if playlist_url and playlist_url != url:
|
||||
return self.url_result(
|
||||
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
||||
video_title=title)
|
||||
return self.playlist_result(
|
||||
self._playlist_entries(playlist), playlist_id=playlist_id,
|
||||
playlist_title=title)
|
||||
|
||||
def _extract_identity_token(self, webpage, item_id):
|
||||
ytcfg = self._parse_json(
|
||||
self._search_regex(
|
||||
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
||||
default='{}'), item_id, fatal=False)
|
||||
if ytcfg:
|
||||
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
|
||||
if token:
|
||||
return token
|
||||
return self._search_regex(
|
||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
'identity token', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
url = compat_urlparse.urlunparse(
|
||||
@@ -3088,9 +3156,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
|
||||
webpage = self._download_webpage(url, item_id)
|
||||
identity_token = self._search_regex(
|
||||
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
'identity token', default=None)
|
||||
identity_token = self._extract_identity_token(webpage, item_id)
|
||||
data = self._extract_yt_initial_data(item_id, webpage)
|
||||
tabs = try_get(
|
||||
data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
|
||||
@@ -3099,7 +3165,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
playlist = try_get(
|
||||
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
|
||||
if playlist:
|
||||
return self._extract_from_playlist(item_id, data, playlist)
|
||||
return self._extract_from_playlist(item_id, url, data, playlist)
|
||||
# Fallback to video extraction if no playlist alike page is recognized.
|
||||
# First check for the current video then try the v attribute of URL query.
|
||||
video_id = try_get(
|
||||
@@ -3107,6 +3173,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
||||
compat_str) or video_id
|
||||
if video_id:
|
||||
return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
|
||||
# Capture and output alerts
|
||||
alert = self._extract_alert(data)
|
||||
if alert:
|
||||
raise ExtractorError(alert, expected=True)
|
||||
# Failed to recognize
|
||||
raise ExtractorError('Unable to recognize tab page')
|
||||
|
||||
@@ -3119,8 +3189,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
(?:
|
||||
(?:
|
||||
youtube(?:kids)?\.com|
|
||||
invidio\.us|
|
||||
youtu\.be
|
||||
invidio\.us
|
||||
)
|
||||
/.*?\?.*?\blist=
|
||||
)?
|
||||
@@ -3165,6 +3234,32 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
|
||||
}
|
||||
}, {
|
||||
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# music album playlist
|
||||
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if YoutubeTabIE.suitable(url) else super(
|
||||
YoutubePlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
if not qs:
|
||||
qs = {'list': playlist_id}
|
||||
return self.url_result(
|
||||
update_url_query('https://www.youtube.com/playlist', qs),
|
||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
|
||||
|
||||
|
||||
class YoutubeYtBeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||
_TESTS = [{
|
||||
'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
|
||||
'info_dict': {
|
||||
'id': 'yeWKywCrFtk',
|
||||
@@ -3187,28 +3282,18 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# music album playlist
|
||||
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if YoutubeTabIE.suitable(url) else super(
|
||||
YoutubePlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
if not qs:
|
||||
qs = {'list': playlist_id}
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
playlist_id = mobj.group('playlist_id')
|
||||
return self.url_result(
|
||||
update_url_query('https://www.youtube.com/playlist', qs),
|
||||
ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
|
||||
update_url_query('https://www.youtube.com/watch', {
|
||||
'v': video_id,
|
||||
'list': playlist_id,
|
||||
'feature': 'youtu.be',
|
||||
}), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
|
||||
|
||||
|
||||
class YoutubeYtUserIE(InfoExtractor):
|
||||
|
@@ -40,7 +40,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
|
||||
class ZDFIE(ZDFBaseIE):
|
||||
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
|
||||
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
_TESTS = [{
|
||||
@@ -119,7 +119,7 @@ class ZDFIE(ZDFBaseIE):
|
||||
if not ptmd_path:
|
||||
ptmd_path = t[
|
||||
'http://zdf.de/rels/streams/ptmd-template'].replace(
|
||||
'{playerId}', 'portal')
|
||||
'{playerId}', 'ngplayer_2_4')
|
||||
|
||||
ptmd = self._call_api(
|
||||
urljoin(url, ptmd_path), player, url, video_id, 'metadata')
|
||||
|
@@ -369,7 +369,7 @@ def parseOpts(overrideArguments=None):
|
||||
authentication.add_option(
|
||||
'--video-password',
|
||||
dest='videopassword', metavar='PASSWORD',
|
||||
help='Video password (vimeo, smotri, youku)')
|
||||
help='Video password (vimeo, youku)')
|
||||
|
||||
adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
|
||||
adobe_pass.add_option(
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2020.11.24'
|
||||
__version__ = '2020.12.14'
|
||||
|
Reference in New Issue
Block a user