mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-01-24 20:30:13 +09:00
Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into mkvthumbnail
This commit is contained in:
commit
aa9a04bab9
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
- [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2020.09.20
|
[debug] youtube-dl version 2020.11.21.1
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
- [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
@ -18,13 +18,13 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
- [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
- [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2020.09.20
|
[debug] youtube-dl version 2020.11.21.1
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.09.20**
|
- [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
149
ChangeLog
149
ChangeLog
@ -1,3 +1,118 @@
|
|||||||
|
version 2020.11.21.1
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [downloader/http] Fix crash during urlopen caused by missing reason
|
||||||
|
of URLError
|
||||||
|
* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries
|
||||||
|
of url_transparent (#27064)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [svtplay] Add support for svt.se/barnkanalen (#24817)
|
||||||
|
+ [svt] Extract timestamp (#27130)
|
||||||
|
* [svtplay] Improve thumbnail extraction (#27130)
|
||||||
|
* [youtube] Fix error reason extraction (#27081)
|
||||||
|
* [youtube] Fix like and dislike count extraction (#25977)
|
||||||
|
+ [youtube:tab] Add support for current video and fix lives extraction (#27126)
|
||||||
|
* [infoq] Fix format extraction (#25984)
|
||||||
|
* [francetv] Update to fix thumbnail URL issue (#27120)
|
||||||
|
* [youtube] Improve yt initial data extraction (#27093)
|
||||||
|
+ [discoverynetworks] Add support new TLC/DMAX URLs (#27100)
|
||||||
|
* [rai] Fix protocol relative relinker URLs (#22766)
|
||||||
|
* [rai] Fix unavailable video format detection
|
||||||
|
* [rai] Improve extraction
|
||||||
|
* [rai] Fix extraction (#27077)
|
||||||
|
* [viki] Improve format extraction
|
||||||
|
* [viki] Fix stream extraction from MPD (#27092)
|
||||||
|
* [googledrive] Fix format extraction (#26979)
|
||||||
|
+ [amara] Add support for amara.org (#20618)
|
||||||
|
* [vimeo:album] Fix extraction (#27079)
|
||||||
|
* [mtv] Fix mgid extraction (#26841)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.19
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400,
|
||||||
|
#24151, #25617, #25618, #25586, #26068, #27072)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youporn] Fix upload date extraction
|
||||||
|
* [youporn] Make comment count optional (#26986)
|
||||||
|
* [arte] Rework extractors
|
||||||
|
* Reimplement embed and playlist extractors to delegate to the single
|
||||||
|
entrypoint artetv extractor
|
||||||
|
* Improve embeds detection (#27057)
|
||||||
|
+ [arte] Extract m3u8 formats (#27061)
|
||||||
|
* [mgtv] Fix format extraction (#26415)
|
||||||
|
+ [lbry] Add support for odysee.com (#26806)
|
||||||
|
* [francetv] Improve info extraction
|
||||||
|
+ [francetv] Add fallback video URL extraction (#27047)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [spiegel] Fix extraction (#24206, #24767)
|
||||||
|
* [youtube] Improve extraction
|
||||||
|
+ Add support for --no-playlist (#27009)
|
||||||
|
* Improve playlist and mix extraction (#26390, #26509, #26534, #27011)
|
||||||
|
+ Extract playlist uploader data
|
||||||
|
* [youtube:tab] Fix view count extraction (#27051)
|
||||||
|
* [malltv] Fix extraction (#27035)
|
||||||
|
+ [bandcamp] Extract playlist description (#22684)
|
||||||
|
* [urplay] Fix extraction (#26828)
|
||||||
|
* [youtube:tab] Fix playlist title extraction (#27015)
|
||||||
|
* [youtube] Fix chapters extraction (#26005)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.17
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Skip ! prefixed code in js_to_json
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube:tab] Fix extraction with cookies provided (#27005)
|
||||||
|
* [lrt] Fix extraction with empty tags (#20264)
|
||||||
|
+ [ndr:embed:base] Extract subtitles (#25447, #26106)
|
||||||
|
+ [servus] Add support for pm-wissen.com (#25869)
|
||||||
|
* [servus] Fix extraction (#26872, #26967, #26983, #27000)
|
||||||
|
* [xtube] Fix extraction (#26996)
|
||||||
|
* [lrt] Fix extraction
|
||||||
|
+ [lbry] Add support for lbry.tv
|
||||||
|
+ [condenast] Extract subtitles
|
||||||
|
* [condenast] Fix extraction
|
||||||
|
* [bandcamp] Fix extraction (#26681, #26684)
|
||||||
|
* [rai] Fix RaiPlay extraction (#26064, #26096)
|
||||||
|
* [vlive] Fix extraction
|
||||||
|
* [usanetwork] Fix extraction
|
||||||
|
* [nbc] Fix NBCNews/Today/MSNBC extraction
|
||||||
|
* [cnbc] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.12
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Rework extractors
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.11.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
|
||||||
|
* [downloader/http] Properly handle missing message in SSLError (#26646)
|
||||||
|
* [downloader/http] Fix access to not yet opened stream in retry
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix JS player URL extraction
|
||||||
|
* [ytsearch] Fix extraction (#26920)
|
||||||
|
* [afreecatv] Fix typo (#26970)
|
||||||
|
* [23video] Relax URL regular expression (#26870)
|
||||||
|
+ [ustream] Add support for video.ibm.com (#26894)
|
||||||
|
* [iqiyi] Fix typo (#26884)
|
||||||
|
+ [expressen] Add support for di.se (#26670)
|
||||||
|
* [iprima] Improve video id extraction (#26507, #26494)
|
||||||
|
|
||||||
|
|
||||||
version 2020.09.20
|
version 2020.09.20
|
||||||
|
|
||||||
Core
|
Core
|
||||||
@ -283,7 +398,7 @@ Extractors
|
|||||||
+ Add support for more domains
|
+ Add support for more domains
|
||||||
* [svt] Fix series extraction (#22297)
|
* [svt] Fix series extraction (#22297)
|
||||||
* [svt] Fix article extraction (#22897, #22919)
|
* [svt] Fix article extraction (#22897, #22919)
|
||||||
* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
|
* [soundcloud] Improve private playlist/set tracks extraction (#3707)
|
||||||
|
|
||||||
|
|
||||||
version 2020.01.24
|
version 2020.01.24
|
||||||
@ -409,7 +524,7 @@ Extractors
|
|||||||
* [abcotvs] Relax URL regular expression and improve metadata extraction
|
* [abcotvs] Relax URL regular expression and improve metadata extraction
|
||||||
(#18014)
|
(#18014)
|
||||||
* [channel9] Reduce response size
|
* [channel9] Reduce response size
|
||||||
* [adobetv] Improve extaction
|
* [adobetv] Improve extraction
|
||||||
* Use OnDemandPagedList for list extractors
|
* Use OnDemandPagedList for list extractors
|
||||||
* Reduce show extraction requests
|
* Reduce show extraction requests
|
||||||
* Extract original video format and subtitles
|
* Extract original video format and subtitles
|
||||||
@ -434,7 +549,7 @@ Extractors
|
|||||||
* [dailymotion] Improve extraction
|
* [dailymotion] Improve extraction
|
||||||
* Extract http formats included in m3u8 manifest
|
* Extract http formats included in m3u8 manifest
|
||||||
* Fix user extraction (#3553, #21415)
|
* Fix user extraction (#3553, #21415)
|
||||||
+ Add suport for User Authentication (#11491)
|
+ Add support for User Authentication (#11491)
|
||||||
* Fix password protected videos extraction (#23176)
|
* Fix password protected videos extraction (#23176)
|
||||||
* Respect age limit option and family filter cookie value (#18437)
|
* Respect age limit option and family filter cookie value (#18437)
|
||||||
* Handle video url playlist query param
|
* Handle video url playlist query param
|
||||||
@ -519,7 +634,7 @@ Extractors
|
|||||||
- [go90] Remove extractor
|
- [go90] Remove extractor
|
||||||
* [kakao] Remove raw request
|
* [kakao] Remove raw request
|
||||||
+ [kakao] Extract format total bitrate
|
+ [kakao] Extract format total bitrate
|
||||||
* [daum] Fix VOD and Clip extracton (#15015)
|
* [daum] Fix VOD and Clip extraction (#15015)
|
||||||
* [kakao] Improve extraction
|
* [kakao] Improve extraction
|
||||||
+ Add support for embed URLs
|
+ Add support for embed URLs
|
||||||
+ Add support for Kakao Legacy vid based embed URLs
|
+ Add support for Kakao Legacy vid based embed URLs
|
||||||
@ -563,7 +678,7 @@ Extractors
|
|||||||
* Improve format extraction (#22123)
|
* Improve format extraction (#22123)
|
||||||
+ Extract uploader_id and uploader_url (#21916)
|
+ Extract uploader_id and uploader_url (#21916)
|
||||||
+ Extract all known thumbnails (#19071, #20659)
|
+ Extract all known thumbnails (#19071, #20659)
|
||||||
* Fix extration for private playlists (#20976)
|
* Fix extraction for private playlists (#20976)
|
||||||
+ Add support for playlist embeds (#20976)
|
+ Add support for playlist embeds (#20976)
|
||||||
* Skip preview formats (#22806)
|
* Skip preview formats (#22806)
|
||||||
* [dplay] Improve extraction
|
* [dplay] Improve extraction
|
||||||
@ -1038,7 +1153,7 @@ Extractors
|
|||||||
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
|
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
|
||||||
* [youtube] Extract srv[1-3] subtitle formats (#20566)
|
* [youtube] Extract srv[1-3] subtitle formats (#20566)
|
||||||
* [adultswim] Fix extraction (#18025)
|
* [adultswim] Fix extraction (#18025)
|
||||||
* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
|
* [teamcoco] Fix extraction and add support for subdomains (#17099, #20339)
|
||||||
* [adn] Fix subtitle compatibility with ffmpeg
|
* [adn] Fix subtitle compatibility with ffmpeg
|
||||||
* [adn] Fix extraction and add support for positioning styles (#20549)
|
* [adn] Fix extraction and add support for positioning styles (#20549)
|
||||||
* [vk] Use unique video id (#17848)
|
* [vk] Use unique video id (#17848)
|
||||||
@ -1450,7 +1565,7 @@ version 2018.11.18
|
|||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
+ [wwe] Extract subtitles
|
+ [wwe] Extract subtitles
|
||||||
+ [wwe] Add support for playlistst (#14781)
|
+ [wwe] Add support for playlists (#14781)
|
||||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||||
* [vk] Detect geo restriction (#17767)
|
* [vk] Detect geo restriction (#17767)
|
||||||
* [openload] Use original host during extraction (#18211)
|
* [openload] Use original host during extraction (#18211)
|
||||||
@ -2483,7 +2598,7 @@ Extractors
|
|||||||
* [youku] Update ccode (#14872)
|
* [youku] Update ccode (#14872)
|
||||||
* [mnet] Fix format extraction (#14883)
|
* [mnet] Fix format extraction (#14883)
|
||||||
+ [xiami] Add Referer header to API request
|
+ [xiami] Add Referer header to API request
|
||||||
* [mtv] Correct scc extention in extracted subtitles (#13730)
|
* [mtv] Correct scc extension in extracted subtitles (#13730)
|
||||||
* [vvvvid] Fix extraction for kenc videos (#13406)
|
* [vvvvid] Fix extraction for kenc videos (#13406)
|
||||||
+ [br] Add support for BR Mediathek videos (#14560, #14788)
|
+ [br] Add support for BR Mediathek videos (#14560, #14788)
|
||||||
+ [daisuki] Add support for motto.daisuki.com (#14681)
|
+ [daisuki] Add support for motto.daisuki.com (#14681)
|
||||||
@ -2504,7 +2619,7 @@ Extractors
|
|||||||
* [nexx] Extract more formats
|
* [nexx] Extract more formats
|
||||||
+ [openload] Add support for openload.link (#14763)
|
+ [openload] Add support for openload.link (#14763)
|
||||||
* [empflix] Relax URL regular expression
|
* [empflix] Relax URL regular expression
|
||||||
* [empflix] Fix extractrion
|
* [empflix] Fix extraction
|
||||||
* [tnaflix] Don't modify download URLs (#14811)
|
* [tnaflix] Don't modify download URLs (#14811)
|
||||||
- [gamersyde] Remove extractor
|
- [gamersyde] Remove extractor
|
||||||
* [francetv:generationwhat] Fix extraction
|
* [francetv:generationwhat] Fix extraction
|
||||||
@ -2699,7 +2814,7 @@ Extractors
|
|||||||
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
||||||
* [yahoo] Use extracted brightcove account id (#14210)
|
* [yahoo] Use extracted brightcove account id (#14210)
|
||||||
* [rtve:alacarta] Fix extraction (#14290)
|
* [rtve:alacarta] Fix extraction (#14290)
|
||||||
+ [yahoo] Add support for custom brigthcove embeds (#14210)
|
+ [yahoo] Add support for custom brightcove embeds (#14210)
|
||||||
+ [generic] Add support for Video.js embeds
|
+ [generic] Add support for Video.js embeds
|
||||||
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
||||||
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
||||||
@ -2944,7 +3059,7 @@ Extractors
|
|||||||
* [amcnetworks] Make rating optional (#12453)
|
* [amcnetworks] Make rating optional (#12453)
|
||||||
* [cloudy] Fix extraction (#13737)
|
* [cloudy] Fix extraction (#13737)
|
||||||
+ [nickru] Add support for nickelodeon.ru
|
+ [nickru] Add support for nickelodeon.ru
|
||||||
* [mtv] Improve thumbnal extraction
|
* [mtv] Improve thumbnail extraction
|
||||||
* [nick] Automate geo-restriction bypass (#13711)
|
* [nick] Automate geo-restriction bypass (#13711)
|
||||||
* [niconico] Improve error reporting (#13696)
|
* [niconico] Improve error reporting (#13696)
|
||||||
|
|
||||||
@ -3308,7 +3423,7 @@ Extractors
|
|||||||
+ [cda] Support birthday verification (#12789)
|
+ [cda] Support birthday verification (#12789)
|
||||||
* [leeco] Fix extraction (#12974)
|
* [leeco] Fix extraction (#12974)
|
||||||
+ [pbs] Extract chapters
|
+ [pbs] Extract chapters
|
||||||
* [amp] Imporove thumbnail and subtitles extraction
|
* [amp] Improve thumbnail and subtitles extraction
|
||||||
* [foxsports] Fix extraction (#12945)
|
* [foxsports] Fix extraction (#12945)
|
||||||
- [coub] Remove comment count extraction (#12941)
|
- [coub] Remove comment count extraction (#12941)
|
||||||
|
|
||||||
@ -3478,7 +3593,7 @@ Extractors
|
|||||||
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
||||||
+ [npo:live] Add support for default URL (#12555)
|
+ [npo:live] Add support for default URL (#12555)
|
||||||
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
||||||
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
|
+ [thesun] Add support for thesun.co.uk (#11298, #12674)
|
||||||
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
||||||
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
||||||
+ [kaltura] Add support for iframe embeds (#12679)
|
+ [kaltura] Add support for iframe embeds (#12679)
|
||||||
@ -3517,7 +3632,7 @@ Extractors
|
|||||||
* [funimation] Fix extraction (#10696, #11773)
|
* [funimation] Fix extraction (#10696, #11773)
|
||||||
+ [xfileshare] Add support for vidabc.com (#12589)
|
+ [xfileshare] Add support for vidabc.com (#12589)
|
||||||
+ [xfileshare] Improve extraction and extract hls formats
|
+ [xfileshare] Improve extraction and extract hls formats
|
||||||
+ [crunchyroll] Pass geo verifcation proxy
|
+ [crunchyroll] Pass geo verification proxy
|
||||||
+ [cwtv] Extract ISM formats
|
+ [cwtv] Extract ISM formats
|
||||||
+ [tvplay] Bypass geo restriction
|
+ [tvplay] Bypass geo restriction
|
||||||
+ [vrv] Add support for vrv.co
|
+ [vrv] Add support for vrv.co
|
||||||
@ -3581,7 +3696,7 @@ Extractors
|
|||||||
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
||||||
+ [toongoggles] Add support for toongoggles.com (#12171)
|
+ [toongoggles] Add support for toongoggles.com (#12171)
|
||||||
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
||||||
+ [discoverynetworks] Add support for more domains and bypass geo restiction
|
+ [discoverynetworks] Add support for more domains and bypass geo restriction
|
||||||
* [openload] Fix extraction (#10408)
|
* [openload] Fix extraction (#10408)
|
||||||
|
|
||||||
|
|
||||||
@ -5171,7 +5286,7 @@ version 2016.07.09.1
|
|||||||
Fixed/improved extractors
|
Fixed/improved extractors
|
||||||
- youtube
|
- youtube
|
||||||
- ard
|
- ard
|
||||||
- srmediatek (#9373)
|
- srmediathek (#9373)
|
||||||
|
|
||||||
|
|
||||||
version 2016.07.09
|
version 2016.07.09
|
||||||
@ -5235,7 +5350,7 @@ Fixed/improved extractors
|
|||||||
- kaltura (#5557)
|
- kaltura (#5557)
|
||||||
- la7
|
- la7
|
||||||
- Changed features
|
- Changed features
|
||||||
- Rename --cn-verfication-proxy to --geo-verification-proxy
|
- Rename --cn-verification-proxy to --geo-verification-proxy
|
||||||
Miscellaneous
|
Miscellaneous
|
||||||
- Add script for displaying downloads statistics
|
- Add script for displaying downloads statistics
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
|
|||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
# find the correct sorting and add the required base classes so that sublcasses
|
# find the correct sorting and add the required base classes so that subclasses
|
||||||
# can be correctly created
|
# can be correctly created
|
||||||
classes = _ALL_CLASSES[:-1]
|
classes = _ALL_CLASSES[:-1]
|
||||||
ordered_cls = []
|
ordered_cls = []
|
||||||
|
@ -41,6 +41,7 @@
|
|||||||
- **AlJazeera**
|
- **AlJazeera**
|
||||||
- **Allocine**
|
- **Allocine**
|
||||||
- **AlphaPorno**
|
- **AlphaPorno**
|
||||||
|
- **Amara**
|
||||||
- **AMCNetworks**
|
- **AMCNetworks**
|
||||||
- **AmericasTestKitchen**
|
- **AmericasTestKitchen**
|
||||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||||
@ -58,9 +59,9 @@
|
|||||||
- **ARD:mediathek**
|
- **ARD:mediathek**
|
||||||
- **ARDBetaMediathek**
|
- **ARDBetaMediathek**
|
||||||
- **Arkena**
|
- **Arkena**
|
||||||
- **arte.tv:+7**
|
- **ArteTV**
|
||||||
- **arte.tv:embed**
|
- **ArteTVEmbed**
|
||||||
- **arte.tv:playlist**
|
- **ArteTVPlaylist**
|
||||||
- **AsianCrush**
|
- **AsianCrush**
|
||||||
- **AsianCrushPlaylist**
|
- **AsianCrushPlaylist**
|
||||||
- **AtresPlayer**
|
- **AtresPlayer**
|
||||||
@ -417,6 +418,7 @@
|
|||||||
- **la7.it**
|
- **la7.it**
|
||||||
- **laola1tv**
|
- **laola1tv**
|
||||||
- **laola1tv:embed**
|
- **laola1tv:embed**
|
||||||
|
- **lbry.tv**
|
||||||
- **LCI**
|
- **LCI**
|
||||||
- **Lcp**
|
- **Lcp**
|
||||||
- **LcpPlay**
|
- **LcpPlay**
|
||||||
@ -823,8 +825,6 @@
|
|||||||
- **SpankBangPlaylist**
|
- **SpankBangPlaylist**
|
||||||
- **Spankwire**
|
- **Spankwire**
|
||||||
- **Spiegel**
|
- **Spiegel**
|
||||||
- **Spiegel:Article**: Articles on spiegel.de
|
|
||||||
- **Spiegeltv**
|
|
||||||
- **sport.francetvinfo.fr**
|
- **sport.francetvinfo.fr**
|
||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBox**
|
- **SportBox**
|
||||||
@ -1042,7 +1042,6 @@
|
|||||||
- **vk:wallpost**
|
- **vk:wallpost**
|
||||||
- **vlive**
|
- **vlive**
|
||||||
- **vlive:channel**
|
- **vlive:channel**
|
||||||
- **vlive:playlist**
|
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
- **VODPl**
|
- **VODPl**
|
||||||
- **VODPlatform**
|
- **VODPlatform**
|
||||||
@ -1131,20 +1130,15 @@
|
|||||||
- **YourPorn**
|
- **YourPorn**
|
||||||
- **YourUpload**
|
- **YourUpload**
|
||||||
- **youtube**: YouTube.com
|
- **youtube**: YouTube.com
|
||||||
- **youtube:channel**: YouTube.com channels
|
|
||||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
|
||||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||||
- **youtube:live**: YouTube.com live streams
|
|
||||||
- **youtube:playlist**: YouTube.com playlists
|
- **youtube:playlist**: YouTube.com playlists
|
||||||
- **youtube:playlists**: YouTube.com user/channel playlists
|
|
||||||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||||
- **youtube:search**: YouTube.com searches
|
- **youtube:search**: YouTube.com searches
|
||||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||||
- **youtube:search_url**: YouTube.com search URLs
|
|
||||||
- **youtube:show**: YouTube.com (multi-season) shows
|
|
||||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
- **youtube:tab**: YouTube.com tab
|
||||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||||
|
- **YoutubeYtUser**
|
||||||
- **Zapiks**
|
- **Zapiks**
|
||||||
- **Zaq1**
|
- **Zaq1**
|
||||||
- **Zattoo**
|
- **Zattoo**
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
"writeinfojson": true,
|
"writeinfojson": true,
|
||||||
"writesubtitles": false,
|
"writesubtitles": false,
|
||||||
"allsubtitles": false,
|
"allsubtitles": false,
|
||||||
"listssubtitles": false,
|
"listsubtitles": false,
|
||||||
"socket_timeout": 20,
|
"socket_timeout": 20,
|
||||||
"fixup": "never"
|
"fixup": "never"
|
||||||
}
|
}
|
||||||
|
@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
self.assertEqual(downloaded['extractor'], 'testex')
|
self.assertEqual(downloaded['extractor'], 'testex')
|
||||||
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||||
|
|
||||||
|
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||||
|
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||||
|
|
||||||
|
class _YDL(YDL):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super(_YDL, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def trouble(self, s, tb=None):
|
||||||
|
pass
|
||||||
|
|
||||||
|
ydl = _YDL({
|
||||||
|
'format': 'extra',
|
||||||
|
'ignoreerrors': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
class VideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'video:(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
formats = [{
|
||||||
|
'format_id': 'default',
|
||||||
|
'url': 'url:',
|
||||||
|
}]
|
||||||
|
if video_id == '0':
|
||||||
|
raise ExtractorError('foo')
|
||||||
|
if video_id == '2':
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'extra',
|
||||||
|
'url': TEST_URL,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': 'Video %s' % video_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
class PlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'playlist:'
|
||||||
|
|
||||||
|
def _entries(self):
|
||||||
|
for n in range(3):
|
||||||
|
video_id = compat_str(n)
|
||||||
|
yield {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': VideoIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'video:%s' % video_id,
|
||||||
|
'title': 'Video Transparent %s' % video_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.playlist_result(self._entries())
|
||||||
|
|
||||||
|
ydl.add_info_extractor(VideoIE(ydl))
|
||||||
|
ydl.add_info_extractor(PlaylistIE(ydl))
|
||||||
|
info = ydl.extract_info('playlist:')
|
||||||
|
entries = info['entries']
|
||||||
|
self.assertEqual(len(entries), 3)
|
||||||
|
self.assertTrue(entries[0] is None)
|
||||||
|
self.assertTrue(entries[1] is None)
|
||||||
|
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||||
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
self.assertEqual(entries[2], downloaded)
|
||||||
|
self.assertEqual(downloaded['url'], TEST_URL)
|
||||||
|
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||||
|
self.assertEqual(downloaded['id'], '2')
|
||||||
|
self.assertEqual(downloaded['extractor'], 'Video')
|
||||||
|
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -31,16 +31,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
|
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||||
assertPlaylist('PL63F0C78739B09958')
|
assertPlaylist('PL63F0C78739B09958')
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||||
# Top tracks
|
# Top tracks
|
||||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||||
@ -51,26 +52,22 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||||
|
|
||||||
def test_youtube_channel_matching(self):
|
def test_youtube_channel_matching(self):
|
||||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
|
|
||||||
def test_youtube_user_matching(self):
|
# def test_youtube_user_matching(self):
|
||||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||||
|
|
||||||
def test_youtube_feeds(self):
|
def test_youtube_feeds(self):
|
||||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
|
||||||
|
|
||||||
def test_youtube_show_matching(self):
|
# def test_youtube_search_matching(self):
|
||||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
|
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
def test_youtube_search_matching(self):
|
|
||||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
|
||||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
|
@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(d['x'], 1)
|
self.assertEqual(d['x'], 1)
|
||||||
self.assertEqual(d['y'], 'a')
|
self.assertEqual(d['y'], 'a')
|
||||||
|
|
||||||
|
# Just drop ! prefix for now though this results in a wrong value
|
||||||
|
on = js_to_json('''{
|
||||||
|
a: !0,
|
||||||
|
b: !1,
|
||||||
|
c: !!0,
|
||||||
|
d: !!42.42,
|
||||||
|
e: !!![],
|
||||||
|
f: !"abc",
|
||||||
|
g: !"",
|
||||||
|
!42: 42
|
||||||
|
}''')
|
||||||
|
self.assertEqual(json.loads(on), {
|
||||||
|
'a': 0,
|
||||||
|
'b': 1,
|
||||||
|
'c': 0,
|
||||||
|
'd': 42.42,
|
||||||
|
'e': [],
|
||||||
|
'f': "abc",
|
||||||
|
'g': "",
|
||||||
|
'42': 42
|
||||||
|
})
|
||||||
|
|
||||||
on = js_to_json('["abc", "def",]')
|
on = js_to_json('["abc", "def",]')
|
||||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||||
|
|
||||||
|
@ -793,21 +793,14 @@ class YoutubeDL(object):
|
|||||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||||
'and will probably not work.')
|
'and will probably not work.')
|
||||||
|
|
||||||
|
return self.__extract_info(url, ie, download, extra_info, process)
|
||||||
|
else:
|
||||||
|
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||||
|
|
||||||
|
def __handle_extraction_exceptions(func):
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
ie_result = ie.extract(url)
|
return func(self, *args, **kwargs)
|
||||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
|
||||||
break
|
|
||||||
if isinstance(ie_result, list):
|
|
||||||
# Backwards compatibility: old IE result format
|
|
||||||
ie_result = {
|
|
||||||
'_type': 'compat_list',
|
|
||||||
'entries': ie_result,
|
|
||||||
}
|
|
||||||
self.add_default_extra_info(ie_result, ie, url)
|
|
||||||
if process:
|
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
|
||||||
else:
|
|
||||||
return ie_result
|
|
||||||
except GeoRestrictedError as e:
|
except GeoRestrictedError as e:
|
||||||
msg = e.msg
|
msg = e.msg
|
||||||
if e.countries:
|
if e.countries:
|
||||||
@ -815,20 +808,33 @@ class YoutubeDL(object):
|
|||||||
map(ISO3166Utils.short2full, e.countries))
|
map(ISO3166Utils.short2full, e.countries))
|
||||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
self.report_error(msg)
|
self.report_error(msg)
|
||||||
break
|
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
self.report_error(compat_str(e), e.format_traceback())
|
||||||
break
|
|
||||||
except MaxDownloadsReached:
|
except MaxDownloadsReached:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.params.get('ignoreerrors', False):
|
if self.params.get('ignoreerrors', False):
|
||||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __extract_info(self, url, ie, download, extra_info, process):
|
||||||
|
ie_result = ie.extract(url)
|
||||||
|
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||||
|
return
|
||||||
|
if isinstance(ie_result, list):
|
||||||
|
# Backwards compatibility: old IE result format
|
||||||
|
ie_result = {
|
||||||
|
'_type': 'compat_list',
|
||||||
|
'entries': ie_result,
|
||||||
|
}
|
||||||
|
self.add_default_extra_info(ie_result, ie, url)
|
||||||
|
if process:
|
||||||
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
return ie_result
|
||||||
|
|
||||||
def add_default_extra_info(self, ie_result, ie, url):
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
self.add_extra_info(ie_result, {
|
self.add_extra_info(ie_result, {
|
||||||
@ -1003,9 +1009,8 @@ class YoutubeDL(object):
|
|||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||||
download=download,
|
# TODO: skip failed (empty) entries?
|
||||||
extra_info=extra)
|
|
||||||
playlist_results.append(entry_result)
|
playlist_results.append(entry_result)
|
||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||||
@ -1034,6 +1039,11 @@ class YoutubeDL(object):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __process_iterable_entry(self, entry, download, extra_info):
|
||||||
|
return self.process_ie_result(
|
||||||
|
entry, download=download, extra_info=extra_info)
|
||||||
|
|
||||||
def _build_format_filter(self, filter_spec):
|
def _build_format_filter(self, filter_spec):
|
||||||
" Returns a function to filter the formats according to the filter_spec "
|
" Returns a function to filter the formats according to the filter_spec "
|
||||||
|
|
||||||
|
@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4
|
|||||||
|
|
||||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||||
# and uniform cross-version exceptiong handling
|
# and uniform cross-version exception handling
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
|
|||||||
try:
|
try:
|
||||||
ctx.data = self.ydl.urlopen(request)
|
ctx.data = self.ydl.urlopen(request)
|
||||||
except (compat_urllib_error.URLError, ) as err:
|
except (compat_urllib_error.URLError, ) as err:
|
||||||
if isinstance(err.reason, socket.timeout):
|
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||||
|
reason = getattr(err, 'reason', None)
|
||||||
|
if isinstance(reason, socket.timeout):
|
||||||
raise RetryDownload(err)
|
raise RetryDownload(err)
|
||||||
raise err
|
raise err
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
|
103
youtube_dl/extractor/amara.py
Normal file
103
youtube_dl/extractor/amara.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmaraIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Youtube
|
||||||
|
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||||
|
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'h6ZuVdvYnfE',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why jury trials are becoming less common',
|
||||||
|
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20160813',
|
||||||
|
'uploader': 'PBS NewsHour',
|
||||||
|
'uploader_id': 'PBSNewsHour',
|
||||||
|
'timestamp': 1549639570,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Vimeo
|
||||||
|
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||||
|
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18622084',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Vimeo at CES 2011!',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'timestamp': 1294763658,
|
||||||
|
'upload_date': '20110111',
|
||||||
|
'uploader': 'Sam Morrill',
|
||||||
|
'uploader_id': 'sammorrill'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Direct Link
|
||||||
|
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||||
|
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8KL7I3jLmh6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The danger of a single story',
|
||||||
|
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20091007',
|
||||||
|
'timestamp': 1254942511,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._download_json(
|
||||||
|
'https://amara.org/api/videos/%s/' % video_id,
|
||||||
|
video_id, query={'format': 'json'})
|
||||||
|
title = meta['title']
|
||||||
|
video_url = meta['all_urls'][0]
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for language in (meta.get('languages') or []):
|
||||||
|
subtitles_uri = language.get('subtitles_uri')
|
||||||
|
if not (subtitles_uri and language.get('published')):
|
||||||
|
continue
|
||||||
|
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||||
|
for f in ('json', 'srt', 'vtt'):
|
||||||
|
subtitle.append({
|
||||||
|
'ext': f,
|
||||||
|
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'title': title,
|
||||||
|
'description': meta.get('description'),
|
||||||
|
'thumbnail': meta.get('thumbnail'),
|
||||||
|
'duration': int_or_none(meta.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(meta.get('created')),
|
||||||
|
}
|
||||||
|
|
||||||
|
for ie in (YoutubeIE, VimeoIE):
|
||||||
|
if ie.suitable(video_url):
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
@ -4,23 +4,57 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
|
||||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
|
||||||
# add tests.
|
|
||||||
|
|
||||||
|
|
||||||
class ArteTVBaseIE(InfoExtractor):
|
class ArteTVBaseIE(InfoExtractor):
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||||
info = self._download_json(json_url, video_id)
|
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVIE(ArteTVBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||||
|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||||
|
)
|
||||||
|
/(?P<id>\d{6}-\d{3}-[AF])
|
||||||
|
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '088501-000-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mexico: Stealing Petrol to Survive',
|
||||||
|
'upload_date': '20190628',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||||
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
if not upload_date_str:
|
if not upload_date_str:
|
||||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||||
|
|
||||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||||
subtitle = player_info.get('VSU', '').strip()
|
subtitle = player_info.get('VSU', '').strip()
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
info_dict = {
|
|
||||||
'id': player_info['VID'],
|
|
||||||
'title': title,
|
|
||||||
'description': player_info.get('VDE'),
|
|
||||||
'upload_date': unified_strdate(upload_date_str),
|
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
||||||
}
|
|
||||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||||
|
|
||||||
LANGS = {
|
LANGS = {
|
||||||
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in vsr.items():
|
for format_id, format_dict in vsr.items():
|
||||||
f = dict(format_dict)
|
f = dict(format_dict)
|
||||||
|
format_url = url_or_none(f.get('url'))
|
||||||
|
streamer = f.get('streamer')
|
||||||
|
if not format_url and not streamer:
|
||||||
|
continue
|
||||||
versionCode = f.get('versionCode')
|
versionCode = f.get('versionCode')
|
||||||
l = re.escape(langcode)
|
l = re.escape(langcode)
|
||||||
|
|
||||||
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
lang_pref = -1
|
lang_pref = -1
|
||||||
|
|
||||||
|
media_type = f.get('mediaType')
|
||||||
|
if media_type == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
for m3u8_format in m3u8_formats:
|
||||||
|
m3u8_format['language_preference'] = lang_pref
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
continue
|
||||||
|
|
||||||
format = {
|
format = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
'quality': qfunc(f.get('quality')),
|
'quality': qfunc(f.get('quality')),
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.get('mediaType') == 'rtmp':
|
if media_type == 'rtmp':
|
||||||
format['url'] = f['streamer']
|
format['url'] = f['streamer']
|
||||||
format['play_path'] = 'mp4:' + f['url']
|
format['play_path'] = 'mp4:' + f['url']
|
||||||
format['ext'] = 'flv'
|
format['ext'] = 'flv'
|
||||||
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
self._check_formats(formats, video_id)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
return {
|
||||||
return info_dict
|
'id': player_info.get('VID') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': player_info.get('VDE'),
|
||||||
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
class ArteTVEmbedIE(InfoExtractor):
|
||||||
IE_NAME = 'arte.tv:+7'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '088501-000-A',
|
'id': '100605-013-A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Mexico: Stealing Petrol to Survive',
|
'title': 'United we Stream November Lockdown Edition #13',
|
||||||
'upload_date': '20190628',
|
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||||
|
'upload_date': '20201116',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
@staticmethod
|
||||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
def _extract_urls(webpage):
|
||||||
return self._extract_from_json_url(
|
return [url for _, url in re.findall(
|
||||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||||
video_id, lang)
|
webpage)]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
|
||||||
IE_NAME = 'arte.tv:embed'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https://www\.arte\.tv
|
|
||||||
/player/v3/index\.php\?json_url=
|
|
||||||
(?P<json_url>
|
|
||||||
https?://api\.arte\.tv/api/player/v1/config/
|
|
||||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
json_url = qs['json_url'][0]
|
||||||
|
video_id = ArteTVIE._match_id(json_url)
|
||||||
|
return self.url_result(
|
||||||
|
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
IE_NAME = 'arte.tv:playlist'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
|||||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
collection = self._download_json(
|
collection = self._download_json(
|
||||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
'%s/collectionData/%s/%s?source=videos'
|
||||||
% (lang, playlist_id), playlist_id)
|
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||||
|
entries = []
|
||||||
|
for video in collection['videos']:
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_id = video.get('programId')
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'alt_title': video.get('subtitle'),
|
||||||
|
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||||
|
'duration': int_or_none(video.get('durationSeconds')),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'ie_key': ArteTVIE.ie_key(),
|
||||||
|
})
|
||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
entries = [
|
|
||||||
self._extract_from_json_url(
|
|
||||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
|
||||||
for video in collection['videos'] if video.get('jsonUrl')]
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
@ -5,10 +6,7 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
@ -17,30 +15,32 @@ from ..utils import (
|
|||||||
parse_filesize,
|
parse_filesize,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1812978515',
|
'id': '1812978515',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
|
||||||
'duration': 9.8485,
|
'duration': 9.8485,
|
||||||
|
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||||
|
'upload_date': '20121129',
|
||||||
|
'timestamp': 1354224127,
|
||||||
},
|
},
|
||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
}, {
|
||||||
# free download
|
# free download
|
||||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2650410135',
|
'id': '2650410135',
|
||||||
'ext': 'aiff',
|
'ext': 'aiff',
|
||||||
@ -79,11 +79,16 @@ class BandcampIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||||
|
return self._parse_json(self._html_search_regex(
|
||||||
|
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||||
|
attr + ' data', group=2), video_id, fatal=fatal)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
title = self._match_id(url)
|
||||||
title = mobj.group('title')
|
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
tralbum = self._extract_data_attr(webpage, title)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
track_id = None
|
track_id = None
|
||||||
track = None
|
track = None
|
||||||
@ -91,10 +96,7 @@ class BandcampIE(InfoExtractor):
|
|||||||
duration = None
|
duration = None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
track_info = self._parse_json(
|
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
|
||||||
self._search_regex(
|
|
||||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
|
||||||
webpage, 'track info', default='{}'), title)
|
|
||||||
if track_info:
|
if track_info:
|
||||||
file_ = track_info.get('file')
|
file_ = track_info.get('file')
|
||||||
if isinstance(file_, dict):
|
if isinstance(file_, dict):
|
||||||
@ -111,37 +113,25 @@ class BandcampIE(InfoExtractor):
|
|||||||
'abr': int_or_none(abr_str),
|
'abr': int_or_none(abr_str),
|
||||||
})
|
})
|
||||||
track = track_info.get('title')
|
track = track_info.get('title')
|
||||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
track_id = str_or_none(
|
||||||
|
track_info.get('track_id') or track_info.get('id'))
|
||||||
track_number = int_or_none(track_info.get('track_num'))
|
track_number = int_or_none(track_info.get('track_num'))
|
||||||
duration = float_or_none(track_info.get('duration'))
|
duration = float_or_none(track_info.get('duration'))
|
||||||
|
|
||||||
def extract(key):
|
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||||
return self._search_regex(
|
current = tralbum.get('current') or {}
|
||||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||||
webpage, key, default=None, group='value')
|
|
||||||
|
|
||||||
artist = extract('artist')
|
|
||||||
album = extract('album_title')
|
|
||||||
timestamp = unified_timestamp(
|
timestamp = unified_timestamp(
|
||||||
extract('publish_date') or extract('album_publish_date'))
|
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||||
release_date = unified_strdate(extract('album_release_date'))
|
|
||||||
|
|
||||||
download_link = self._search_regex(
|
download_link = tralbum.get('freeDownloadPage')
|
||||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
|
||||||
'download link', default=None, group='url')
|
|
||||||
if download_link:
|
if download_link:
|
||||||
track_id = self._search_regex(
|
track_id = compat_str(tralbum['id'])
|
||||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
|
||||||
webpage, 'track id')
|
|
||||||
|
|
||||||
download_webpage = self._download_webpage(
|
download_webpage = self._download_webpage(
|
||||||
download_link, track_id, 'Downloading free downloads page')
|
download_link, track_id, 'Downloading free downloads page')
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
track_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
info = try_get(
|
info = try_get(
|
||||||
blob, (lambda x: x['digital_items'][0],
|
blob, (lambda x: x['digital_items'][0],
|
||||||
@ -207,20 +197,20 @@ class BandcampIE(InfoExtractor):
|
|||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': artist,
|
'uploader': artist,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'release_date': release_date,
|
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'track': track,
|
'track': track,
|
||||||
'track_number': track_number,
|
'track_number': track_number,
|
||||||
'track_id': track_id,
|
'track_id': track_id,
|
||||||
'artist': artist,
|
'artist': artist,
|
||||||
'album': album,
|
'album': embed.get('album_title'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
@ -230,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1353101989',
|
'id': '1353101989',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Intro',
|
'title': 'Blazo - Intro',
|
||||||
|
'timestamp': 1311756226,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -238,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38097443',
|
'id': '38097443',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
'timestamp': 1311757238,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@ -274,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'title': '"Entropy" EP',
|
'title': '"Entropy" EP',
|
||||||
'uploader_id': 'jstrecords',
|
'uploader_id': 'jstrecords',
|
||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
|
'description': 'md5:0ff22959c943622972596062f2f366a5',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
@ -283,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
'id': 'we-are-the-plague',
|
'id': 'we-are-the-plague',
|
||||||
'title': 'WE ARE THE PLAGUE',
|
'title': 'WE ARE THE PLAGUE',
|
||||||
'uploader_id': 'insulters',
|
'uploader_id': 'insulters',
|
||||||
|
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
@ -294,41 +292,34 @@ class BandcampAlbumIE(InfoExtractor):
|
|||||||
else super(BandcampAlbumIE, cls).suitable(url))
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
|
||||||
uploader_id = mobj.group('subdomain')
|
|
||||||
album_id = mobj.group('album_id')
|
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
track_elements = re.findall(
|
tralbum = self._extract_data_attr(webpage, playlist_id)
|
||||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
track_info = tralbum.get('trackinfo')
|
||||||
if not track_elements:
|
if not track_info:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
# Only tracks with duration info have songs
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
compat_urlparse.urljoin(url, t_path),
|
urljoin(url, t['title_link']), BandcampIE.ie_key(),
|
||||||
ie=BandcampIE.ie_key(),
|
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
|
||||||
video_title=self._search_regex(
|
for t in track_info
|
||||||
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
if t.get('duration')]
|
||||||
elem_content, 'track title', fatal=False))
|
|
||||||
for elem_content, t_path in track_elements
|
current = tralbum.get('current') or {}
|
||||||
if self._html_search_meta('duration', elem_content, default=None)]
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
|
||||||
webpage, 'title', fatal=False)
|
|
||||||
if title:
|
|
||||||
title = title.replace(r'\"', '"')
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'title': title,
|
'title': current.get('title'),
|
||||||
|
'description': current.get('about'),
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampWeeklyIE(InfoExtractor):
|
class BandcampWeeklyIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:weekly'
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -343,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
'release_date': '20170404',
|
'release_date': '20170404',
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': 'Magic Moments',
|
'episode': 'Magic Moments',
|
||||||
'episode_number': 208,
|
|
||||||
'episode_id': '224',
|
'episode_id': '224',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'opus-lo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
video_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
show = blob['bcw_show']
|
show = blob['bcw_data'][show_id]
|
||||||
|
|
||||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
|
||||||
# which happens to expose the latest Bandcamp Weekly episode.
|
|
||||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in show['audio_stream'].items():
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
@ -390,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
episode_number = None
|
|
||||||
seq = blob.get('bcw_seq')
|
|
||||||
|
|
||||||
if seq and isinstance(seq, list):
|
|
||||||
try:
|
|
||||||
episode_number = next(
|
|
||||||
int_or_none(e.get('episode_number'))
|
|
||||||
for e in seq
|
|
||||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': show_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': show.get('desc') or show.get('short_desc'),
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
'duration': float_or_none(show.get('audio_duration')),
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
@ -411,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
|
|||||||
'release_date': unified_strdate(show.get('published_date')),
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': show.get('subtitle'),
|
'episode': show.get('subtitle'),
|
||||||
'episode_number': episode_number,
|
'episode_id': show_id,
|
||||||
'episode_id': compat_str(video_id),
|
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
98
youtube_dl/extractor/box.py
Normal file
98
youtube_dl/extractor/box.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
# try_get,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BoxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||||
|
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '510727257538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||||
|
'uploader': 'MLS Video',
|
||||||
|
'timestamp': 1566320259,
|
||||||
|
'upload_date': '20190820',
|
||||||
|
'uploader_id': '235196876',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, file_id)
|
||||||
|
request_token = self._parse_json(self._search_regex(
|
||||||
|
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||||
|
'Box config'), file_id)['requestToken']
|
||||||
|
access_token = self._download_json(
|
||||||
|
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||||
|
'Downloading token JSON metadata',
|
||||||
|
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Request-Token': request_token,
|
||||||
|
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||||
|
})[file_id]['read']
|
||||||
|
shared_link = 'https://app.box.com/s/' + shared_name
|
||||||
|
f = self._download_json(
|
||||||
|
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||||
|
'Downloading file JSON metadata', headers={
|
||||||
|
'Authorization': 'Bearer ' + access_token,
|
||||||
|
'BoxApi': 'shared_link=' + shared_link,
|
||||||
|
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||||
|
}, query={
|
||||||
|
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||||
|
})
|
||||||
|
title = f['name']
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'access_token': access_token,
|
||||||
|
'shared_link': shared_link
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||||
|
# entry_url_template = try_get(
|
||||||
|
# entry, lambda x: x['content']['url_template'])
|
||||||
|
# if not entry_url_template:
|
||||||
|
# continue
|
||||||
|
# representation = entry.get('representation')
|
||||||
|
# if representation == 'dash':
|
||||||
|
# TODO: append query to every fragment URL
|
||||||
|
# formats.extend(self._extract_mpd_formats(
|
||||||
|
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||||
|
# file_id, query=query))
|
||||||
|
|
||||||
|
authenticated_download_url = f.get('authenticated_download_url')
|
||||||
|
if authenticated_download_url and f.get('is_download_available'):
|
||||||
|
formats.append({
|
||||||
|
'ext': f.get('extension') or determine_ext(title),
|
||||||
|
'filesize': f.get('size'),
|
||||||
|
'format_id': 'download',
|
||||||
|
'url': update_url_query(authenticated_download_url, query),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
creator = f.get('created_by') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': f.get('description') or None,
|
||||||
|
'uploader': creator.get('name'),
|
||||||
|
'timestamp': parse_iso8601(f.get('created_at')),
|
||||||
|
'uploader_id': creator.get('id'),
|
||||||
|
}
|
@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url(cls, object_str):
|
def _build_brightcove_url(cls, object_str):
|
||||||
"""
|
"""
|
||||||
Build a Brightcove url from a xml string containing
|
Build a Brightcove url from a xml string containing
|
||||||
<object class="BrightcoveExperience">{params}</object>
|
<object class="BrightcoveExperience">{params}</object>
|
||||||
@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
return cls._make_brightcove_url(params)
|
return cls._make_brightcove_url(params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build_brighcove_url_from_js(cls, object_js):
|
def _build_brightcove_url_from_js(cls, object_js):
|
||||||
# The layout of JS is as follows:
|
# The layout of JS is as follows:
|
||||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||||
# // build Brightcove <object /> XML
|
# // build Brightcove <object /> XML
|
||||||
@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
).+?>\s*</object>''',
|
).+?>\s*</object>''',
|
||||||
webpage)
|
webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
|
||||||
|
|
||||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||||
if matches:
|
if matches:
|
||||||
return list(filter(None, [
|
return list(filter(None, [
|
||||||
cls._build_brighcove_url_from_js(custom_bc)
|
cls._build_brightcove_url_from_js(custom_bc)
|
||||||
for custom_bc in matches]))
|
for custom_bc in matches]))
|
||||||
return [src for _, src in re.findall(
|
return [src for _, src in re.findall(
|
||||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class CNBCVideoIE(InfoExtractor):
|
class CNBCVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
video_id = self._download_json(
|
||||||
video_id = self._search_regex(
|
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
'query': '''{
|
||||||
'video id')
|
page(path: "%s") {
|
||||||
|
vcpsId
|
||||||
|
}
|
||||||
|
}''' % path,
|
||||||
|
})['data']['page']['vcpsId']
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||||
CNBCIE.ie_key())
|
CNBCIE.ie_key())
|
||||||
|
@ -1456,9 +1456,10 @@ class InfoExtractor(object):
|
|||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||||
return True
|
return True
|
||||||
except ExtractorError:
|
except ExtractorError as e:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
'%s: %s URL is invalid, skipping: %s'
|
||||||
|
% (video_id, item, error_to_compat_str(e.cause)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def http_scheme(self):
|
def http_scheme(self):
|
||||||
@ -1663,7 +1664,7 @@ class InfoExtractor(object):
|
|||||||
# just the media without qualities renditions.
|
# just the media without qualities renditions.
|
||||||
# Fortunately, master playlist can be easily distinguished from media
|
# Fortunately, master playlist can be easily distinguished from media
|
||||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
# master playlist tags MUST NOT appear in a media playlist and vice versa.
|
||||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||||
# media playlist and MUST NOT appear in master playlist thus we can
|
# media playlist and MUST NOT appear in master playlist thus we can
|
||||||
# clearly detect media playlist with this criterion.
|
# clearly detect media playlist with this criterion.
|
||||||
@ -2595,6 +2596,7 @@ class InfoExtractor(object):
|
|||||||
|
|
||||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
hdcore_sign = 'hdcore=3.7.0'
|
hdcore_sign = 'hdcore=3.7.0'
|
||||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||||
hds_host = hosts.get('hds')
|
hds_host = hosts.get('hds')
|
||||||
@ -2607,6 +2609,7 @@ class InfoExtractor(object):
|
|||||||
for entry in f4m_formats:
|
for entry in f4m_formats:
|
||||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
|
|
||||||
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||||
hls_host = hosts.get('hls')
|
hls_host = hosts.get('hls')
|
||||||
if hls_host:
|
if hls_host:
|
||||||
@ -2614,6 +2617,31 @@ class InfoExtractor(object):
|
|||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
http_host = hosts.get('http')
|
||||||
|
if http_host and 'hdnea=' not in manifest_url:
|
||||||
|
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
|
||||||
|
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||||
|
qualities_length = len(qualities)
|
||||||
|
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
|
||||||
|
i = 0
|
||||||
|
http_formats = []
|
||||||
|
for f in formats:
|
||||||
|
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
|
||||||
|
for protocol in ('http', 'https'):
|
||||||
|
http_f = f.copy()
|
||||||
|
del http_f['manifest_url']
|
||||||
|
http_url = re.sub(
|
||||||
|
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
|
||||||
|
http_f.update({
|
||||||
|
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||||
|
'url': http_url,
|
||||||
|
'protocol': protocol,
|
||||||
|
})
|
||||||
|
http_formats.append(http_f)
|
||||||
|
i += 1
|
||||||
|
formats.extend(http_formats)
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
|
@ -16,6 +16,8 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'uploader': 'gq',
|
'uploader': 'gq',
|
||||||
'upload_date': '20170321',
|
'upload_date': '20170321',
|
||||||
'timestamp': 1490126427,
|
'timestamp': 1490126427,
|
||||||
|
'description': 'How much grimmer would things be if these people were competent?',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||||
'uploader': 'arstechnica',
|
'uploader': 'arstechnica',
|
||||||
'upload_date': '20150916',
|
'upload_date': '20150916',
|
||||||
'timestamp': 1442434955,
|
'timestamp': 1442434920,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||||
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for t, caption in video_info.get('captions', {}).items():
|
||||||
|
caption_url = caption.get('src')
|
||||||
|
if not (t in ('vtt', 'srt', 'tml') and caption_url):
|
||||||
|
continue
|
||||||
|
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'season': video_info.get('season_title'),
|
'season': video_info.get('season_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
'categories': video_info.get('categories'),
|
'categories': video_info.get('categories'),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
|
|||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
else:
|
else:
|
||||||
params = self._extract_video_params(webpage, display_id)
|
video = try_get(self._parse_json(self._search_regex(
|
||||||
info = self._search_json_ld(
|
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||||
webpage, display_id, fatal=False)
|
'preload state', '{}'), display_id),
|
||||||
|
lambda x: x['transformed']['video'])
|
||||||
|
if video:
|
||||||
|
params = {'videoId': video['id']}
|
||||||
|
info = {'description': strip_or_none(video.get('description'))}
|
||||||
|
else:
|
||||||
|
params = self._extract_video_params(webpage, display_id)
|
||||||
|
info = self._search_json_ld(
|
||||||
|
webpage, display_id, fatal=False)
|
||||||
info.update(self._extract_video(params))
|
info.update(self._extract_video(params))
|
||||||
return info
|
return info
|
||||||
|
@ -7,7 +7,7 @@ from .dplay import DPlayIE
|
|||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(DPlayIE):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||||
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
|
|||||||
|
|
||||||
title = get_item('title', preferred_langs) or video_id
|
title = get_item('title', preferred_langs) or video_id
|
||||||
description = get_item('description', preferred_langs)
|
description = get_item('description', preferred_langs)
|
||||||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
||||||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
||||||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
||||||
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnmail,
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
|
@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
|
|||||||
from .airmozilla import AirMozillaIE
|
from .airmozilla import AirMozillaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
|
from .amara import AmaraIE
|
||||||
from .amcnetworks import AMCNetworksIE
|
from .amcnetworks import AMCNetworksIE
|
||||||
from .americastestkitchen import AmericasTestKitchenIE
|
from .americastestkitchen import AmericasTestKitchenIE
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
@ -58,7 +59,7 @@ from .ard import (
|
|||||||
ARDMediathekIE,
|
ARDMediathekIE,
|
||||||
)
|
)
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTVPlus7IE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
@ -121,6 +122,7 @@ from .blinkx import BlinkxIE
|
|||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bostonglobe import BostonGlobeIE
|
from .bostonglobe import BostonGlobeIE
|
||||||
|
from .box import BoxIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import (
|
from .br import (
|
||||||
BRIE,
|
BRIE,
|
||||||
@ -529,6 +531,7 @@ from .laola1tv import (
|
|||||||
EHFTVIE,
|
EHFTVIE,
|
||||||
ITTFIE,
|
ITTFIE,
|
||||||
)
|
)
|
||||||
|
from .lbry import LBRYIE
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
from .lcp import (
|
from .lcp import (
|
||||||
LcpPlayIE,
|
LcpPlayIE,
|
||||||
@ -780,6 +783,7 @@ from .ntvru import NTVRuIE
|
|||||||
from .nytimes import (
|
from .nytimes import (
|
||||||
NYTimesIE,
|
NYTimesIE,
|
||||||
NYTimesArticleIE,
|
NYTimesArticleIE,
|
||||||
|
NYTimesCookingIE,
|
||||||
)
|
)
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .nzz import NZZIE
|
from .nzz import NZZIE
|
||||||
@ -842,6 +846,10 @@ from .picarto import (
|
|||||||
)
|
)
|
||||||
from .piksel import PikselIE
|
from .piksel import PikselIE
|
||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
|
from .pinterest import (
|
||||||
|
PinterestIE,
|
||||||
|
PinterestCollectionIE,
|
||||||
|
)
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .platzi import (
|
from .platzi import (
|
||||||
PlatziIE,
|
PlatziIE,
|
||||||
@ -957,6 +965,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
|
|||||||
from .rtvnh import RTVNHIE
|
from .rtvnh import RTVNHIE
|
||||||
from .rtvs import RTVSIE
|
from .rtvs import RTVSIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
|
from .rumble import RumbleEmbedIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
@ -1007,6 +1016,16 @@ from .shared import (
|
|||||||
from .showroomlive import ShowRoomLiveIE
|
from .showroomlive import ShowRoomLiveIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .sixplay import SixPlayIE
|
from .sixplay import SixPlayIE
|
||||||
|
from .skyit import (
|
||||||
|
SkyItPlayerIE,
|
||||||
|
SkyItVideoIE,
|
||||||
|
SkyItVideoLiveIE,
|
||||||
|
SkyItIE,
|
||||||
|
SkyItAcademyIE,
|
||||||
|
SkyItArteIE,
|
||||||
|
CieloTVItIE,
|
||||||
|
TV8ItIE,
|
||||||
|
)
|
||||||
from .skylinewebcams import SkylineWebcamsIE
|
from .skylinewebcams import SkylineWebcamsIE
|
||||||
from .skynewsarabia import (
|
from .skynewsarabia import (
|
||||||
SkyNewsArabiaIE,
|
SkyNewsArabiaIE,
|
||||||
@ -1053,8 +1072,7 @@ from .spankbang import (
|
|||||||
SpankBangPlaylistIE,
|
SpankBangPlaylistIE,
|
||||||
)
|
)
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
|
||||||
from .spike import (
|
from .spike import (
|
||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
@ -1357,7 +1375,6 @@ from .vk import (
|
|||||||
from .vlive import (
|
from .vlive import (
|
||||||
VLiveIE,
|
VLiveIE,
|
||||||
VLiveChannelIE,
|
VLiveChannelIE,
|
||||||
VLivePlaylistIE
|
|
||||||
)
|
)
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .vodpl import VODPlIE
|
from .vodpl import VODPlIE
|
||||||
@ -1474,21 +1491,17 @@ from .yourporn import YourPornIE
|
|||||||
from .yourupload import YourUploadIE
|
from .yourupload import YourUploadIE
|
||||||
from .youtube import (
|
from .youtube import (
|
||||||
YoutubeIE,
|
YoutubeIE,
|
||||||
YoutubeChannelIE,
|
|
||||||
YoutubeFavouritesIE,
|
|
||||||
YoutubeHistoryIE,
|
YoutubeHistoryIE,
|
||||||
YoutubeLiveIE,
|
YoutubeTabIE,
|
||||||
YoutubePlaylistIE,
|
YoutubePlaylistIE,
|
||||||
YoutubePlaylistsIE,
|
|
||||||
YoutubeRecommendedIE,
|
YoutubeRecommendedIE,
|
||||||
YoutubeSearchDateIE,
|
YoutubeSearchDateIE,
|
||||||
YoutubeSearchIE,
|
YoutubeSearchIE,
|
||||||
YoutubeSearchURLIE,
|
#YoutubeSearchURLIE,
|
||||||
YoutubeShowIE,
|
|
||||||
YoutubeSubscriptionsIE,
|
YoutubeSubscriptionsIE,
|
||||||
YoutubeTruncatedIDIE,
|
YoutubeTruncatedIDIE,
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
YoutubeUserIE,
|
YoutubeYtUserIE,
|
||||||
YoutubeWatchLaterIE,
|
YoutubeWatchLaterIE,
|
||||||
)
|
)
|
||||||
from .zapiks import ZapiksIE
|
from .zapiks import ZapiksIE
|
||||||
|
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
|
|||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'upload_date': '20160907',
|
'upload_date': '20160907',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
|
|||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||||
|
|
||||||
upload_date_str = self._search_regex(
|
upload_date_str = self._search_regex(
|
||||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||||
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -17,6 +17,7 @@ from ..utils import (
|
|||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
|
|
||||||
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
|
|||||||
|
|
||||||
is_live = None
|
is_live = None
|
||||||
|
|
||||||
formats = []
|
videos = []
|
||||||
for video in info['videos']:
|
|
||||||
if video['statut'] != 'ONLINE':
|
for video in (info.get('videos') or []):
|
||||||
|
if video.get('statut') != 'ONLINE':
|
||||||
continue
|
continue
|
||||||
video_url = video['url']
|
if not video.get('url'):
|
||||||
|
continue
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
|
if not videos:
|
||||||
|
for device_type in ['desktop', 'mobile']:
|
||||||
|
fallback_info = self._download_json(
|
||||||
|
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||||
|
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
||||||
|
'device_type': device_type,
|
||||||
|
'browser': 'chrome',
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
if fallback_info and fallback_info.get('video'):
|
||||||
|
videos.append(fallback_info['video'])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in videos:
|
||||||
|
video_url = video.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if is_live is None:
|
if is_live is None:
|
||||||
is_live = (try_get(
|
is_live = (try_get(
|
||||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
|
||||||
bool) is True) or '/live.francetv.fr/' in video_url
|
or video.get('is_live') is True
|
||||||
format_id = video['format']
|
or '/live.francetv.fr/' in video_url)
|
||||||
|
format_id = video.get('format')
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
if georestricted:
|
if georestricted:
|
||||||
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
|
|||||||
sign(video_url, format_id), video_id, 'mp4',
|
sign(video_url, format_id), video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||||
fatal=False))
|
fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = info['titre']
|
title = info['titre']
|
||||||
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(title) if is_live else title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info.get('synopsis')),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
@ -91,6 +91,7 @@ from .piksel import PikselIE
|
|||||||
from .videa import VideaIE
|
from .videa import VideaIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
|
from .arte import ArteTVEmbedIE
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .rutube import RutubeIE
|
from .rutube import RutubeIE
|
||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
@ -841,7 +842,7 @@ class GenericIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# MTVSercices embed
|
# MTVServices embed
|
||||||
{
|
{
|
||||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||||
@ -2760,11 +2761,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded arte.tv player
|
# Look for embedded arte.tv player
|
||||||
mobj = re.search(
|
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
|
||||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
if arte_urls:
|
||||||
webpage)
|
return self.playlist_from_matches(arte_urls, video_id, video_title)
|
||||||
if mobj is not None:
|
|
||||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
|
||||||
|
|
||||||
# Look for embedded francetv player
|
# Look for embedded francetv player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_parse_qs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
try_get,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# video id is longer than 28 characters
|
# video id is longer than 28 characters
|
||||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||||
'info_dict': {
|
|
||||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
|
||||||
'duration': 189,
|
|
||||||
},
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||||
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
video_info = compat_parse_qs(self._download_webpage(
|
||||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
'https://drive.google.com/get_video_info',
|
||||||
|
video_id, query={'docid': video_id}))
|
||||||
|
|
||||||
title = self._search_regex(
|
def get_value(key):
|
||||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
return try_get(video_info, lambda x: x[key][0])
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
duration = int_or_none(self._search_regex(
|
reason = get_value('reason')
|
||||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
title = get_value('title')
|
||||||
default=None))
|
if not title and reason:
|
||||||
|
raise ExtractorError(reason, expected=True)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
fmt_stream_map = self._search_regex(
|
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||||
'fmt stream map', default='').split(',')
|
|
||||||
fmt_list = self._search_regex(
|
|
||||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
|
||||||
'fmt_list', default='').split(',')
|
|
||||||
if fmt_stream_map and fmt_list:
|
if fmt_stream_map and fmt_list:
|
||||||
resolutions = {}
|
resolutions = {}
|
||||||
for fmt in fmt_list:
|
for fmt in fmt_list:
|
||||||
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
if urlh and urlh.headers.get('Content-Disposition'):
|
if urlh and urlh.headers.get('Content-Disposition'):
|
||||||
add_source_format(urlh)
|
add_source_format(urlh)
|
||||||
|
|
||||||
if not formats:
|
if not formats and reason:
|
||||||
reason = self._search_regex(
|
raise ExtractorError(reason, expected=True)
|
||||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
|
||||||
if reason:
|
|
||||||
raise ExtractorError(reason, expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
hl = self._search_regex(
|
hl = get_value('hl')
|
||||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
|
||||||
subtitles_id = None
|
subtitles_id = None
|
||||||
ttsurl = self._search_regex(
|
ttsurl = get_value('ttsurl')
|
||||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
|
||||||
if ttsurl:
|
if ttsurl:
|
||||||
# the video Id for subtitles will be the last value in the ttsurl
|
# the video Id for subtitles will be the last value in the ttsurl
|
||||||
# query string
|
# query string
|
||||||
@ -279,8 +263,8 @@ class GoogleDriveIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||||
'duration': duration,
|
'duration': int_or_none(get_value('length_seconds')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||||
'automatic_captions': self.extract_automatic_captions(
|
'automatic_captions': self.extract_automatic_captions(
|
||||||
|
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
|||||||
|
|
||||||
def _extract_rtmp_video(self, webpage):
|
def _extract_rtmp_video(self, webpage):
|
||||||
# The server URL is hardcoded
|
# The server URL is hardcoded
|
||||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(
|
encoded_id = self._search_regex(
|
||||||
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
|
|||||||
return [{
|
return [{
|
||||||
'format_id': 'http_video',
|
'format_id': 'http_video',
|
||||||
'url': http_video_url,
|
'url': http_video_url,
|
||||||
|
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_http_audio(self, webpage, video_id):
|
def _extract_http_audio(self, webpage, video_id):
|
||||||
fields = self._hidden_inputs(webpage)
|
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||||
http_audio_url = fields.get('filename')
|
http_audio_url = fields.get('filename')
|
||||||
if not http_audio_url:
|
if not http_audio_url:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# base URL is found in the Location header in the response returned by
|
# base URL is found in the Location header in the response returned by
|
||||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||||
|
|
||||||
# audio file seem to be missing some times even if there is a download link
|
# audio file seem to be missing some times even if there is a download link
|
||||||
|
@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
|
|||||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||||
description = xpath_text(doc, 'ABSTRACT')
|
description = xpath_text(doc, 'ABSTRACT')
|
||||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||||
|
|
||||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||||
formats = []
|
formats = []
|
||||||
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'timestamp': createtion_time,
|
'timestamp': creation_time,
|
||||||
}
|
}
|
||||||
|
91
youtube_dl/extractor/lbry.py
Normal file
91
youtube_dl/extractor/lbry.py
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LBRYIE(InfoExtractor):
|
||||||
|
IE_NAME = 'lbry.tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||||
|
_TESTS = [{
|
||||||
|
# Video
|
||||||
|
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||||
|
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'First day in LBRY? Start HERE!',
|
||||||
|
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||||
|
'timestamp': 1595694354,
|
||||||
|
'upload_date': '20200725',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Audio
|
||||||
|
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||||
|
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||||
|
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||||
|
'timestamp': 1591312601,
|
||||||
|
'upload_date': '20200604',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _call_api_proxy(self, method, display_id, params):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||||
|
headers={'Content-Type': 'application/json-rpc'},
|
||||||
|
data=json.dumps({
|
||||||
|
'method': method,
|
||||||
|
'params': params,
|
||||||
|
}).encode())['result']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url).replace(':', '#')
|
||||||
|
uri = 'lbry://' + display_id
|
||||||
|
result = self._call_api_proxy(
|
||||||
|
'resolve', display_id, {'urls': [uri]})[uri]
|
||||||
|
result_value = result['value']
|
||||||
|
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||||
|
raise ExtractorError('Unsupported URL', expected=True)
|
||||||
|
streaming_url = self._call_api_proxy(
|
||||||
|
'get', display_id, {'uri': uri})['streaming_url']
|
||||||
|
source = result_value.get('source') or {}
|
||||||
|
media = result_value.get('video') or result_value.get('audio') or {}
|
||||||
|
signing_channel = result_value.get('signing_channel') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': result['claim_id'],
|
||||||
|
'title': result_value['title'],
|
||||||
|
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||||
|
'description': result_value.get('description'),
|
||||||
|
'license': result_value.get('license'),
|
||||||
|
'timestamp': int_or_none(result.get('timestamp')),
|
||||||
|
'tags': result_value.get('tags'),
|
||||||
|
'width': int_or_none(media.get('width')),
|
||||||
|
'height': int_or_none(media.get('height')),
|
||||||
|
'duration': int_or_none(media.get('duration')),
|
||||||
|
'channel': signing_channel.get('name'),
|
||||||
|
'channel_id': signing_channel.get('claim_id'),
|
||||||
|
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||||
|
'filesize': int_or_none(source.get('size')),
|
||||||
|
'url': streaming_url,
|
||||||
|
}
|
@ -5,28 +5,26 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
clean_html,
|
||||||
int_or_none,
|
merge_dicts,
|
||||||
parse_duration,
|
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LRTIE(InfoExtractor):
|
class LRTIE(InfoExtractor):
|
||||||
IE_NAME = 'lrt.lt'
|
IE_NAME = 'lrt.lt'
|
||||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
|
||||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
'md5': '85cb2bb530f31d91a9c65b479516ade4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '54391',
|
'id': '2000127261',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Septynios Kauno dienos',
|
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
|
||||||
'description': 'md5:24d84534c7dc76581e59f5689462411a',
|
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
|
||||||
'duration': 1783,
|
'duration': 3035,
|
||||||
'view_count': int,
|
'timestamp': 1604079000,
|
||||||
'like_count': int,
|
'upload_date': '20201030',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# direct mp3 download
|
# direct mp3 download
|
||||||
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_js_var(self, webpage, var_name, default):
|
||||||
|
return self._search_regex(
|
||||||
|
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
|
||||||
|
webpage, var_name.replace('_', ' '), default, group=2)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||||
|
media = self._download_json(self._extract_js_var(
|
||||||
|
webpage, 'media_info_url',
|
||||||
|
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
|
||||||
|
video_id, query={'url': media_url})
|
||||||
|
jw_data = self._parse_jwplayer_data(
|
||||||
|
media['playlist_item'], video_id, base_url=url)
|
||||||
|
|
||||||
formats = []
|
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||||
for _, file_url in re.findall(
|
|
||||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
tags = []
|
||||||
ext = determine_ext(file_url)
|
for tag in (media.get('tags') or []):
|
||||||
if ext not in ('m3u8', 'mp3'):
|
tag_name = tag.get('name')
|
||||||
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
# mp3 served as m3u8 produces stuttered media file
|
tags.append(tag_name)
|
||||||
if ext == 'm3u8' and '.mp3' in file_url:
|
|
||||||
continue
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
fatal=False))
|
|
||||||
elif ext == 'mp3':
|
|
||||||
formats.append({
|
|
||||||
'url': file_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
clean_info = {
|
||||||
description = self._og_search_description(webpage)
|
'description': clean_html(media.get('content')),
|
||||||
duration = parse_duration(self._search_regex(
|
'tags': tags,
|
||||||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
|
|
||||||
webpage, 'duration', default=None, group='duration'))
|
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
|
|
||||||
webpage, 'view count', fatal=False, group='count'))
|
|
||||||
like_count = int_or_none(self._search_regex(
|
|
||||||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
|
|
||||||
webpage, 'like count', fatal=False, group='count'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||||
|
@ -1,10 +1,16 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import merge_dicts
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MallTVIE(InfoExtractor):
|
class MallTVIE(InfoExtractor):
|
||||||
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
|
|||||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||||
'duration': 216,
|
'duration': 216,
|
||||||
'timestamp': 1538870400,
|
'timestamp': 1538870400,
|
||||||
'upload_date': '20181007',
|
'upload_date': '20181007',
|
||||||
@ -37,20 +43,46 @@ class MallTVIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, display_id, headers=self.geo_verification_headers())
|
url, display_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
video = self._parse_json(self._search_regex(
|
||||||
|
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||||
|
webpage, 'video object'), display_id)
|
||||||
|
video_source = video['VideoSource']
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
SOURCE_RE, webpage, 'video id', group='id')
|
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||||
|
|
||||||
media = self._parse_html5_media_entries(
|
formats = self._extract_m3u8_formats(
|
||||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for s in (video.get('Subtitles') or {}):
|
||||||
|
s_url = s.get('Url')
|
||||||
|
if not s_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||||
|
'url': s_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
entity_counts = video.get('EntityCounts') or {}
|
||||||
|
|
||||||
|
def get_count(k):
|
||||||
|
v = entity_counts.get(k + 's') or {}
|
||||||
|
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
return merge_dicts(media, info, {
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
'title': video.get('Title'),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': clean_html(video.get('Description')),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': video.get('ThumbnailUrl'),
|
||||||
})
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||||
|
'view_count': get_count('View'),
|
||||||
|
'like_count': get_count('Like'),
|
||||||
|
'dislike_count': get_count('Dislike'),
|
||||||
|
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||||
|
'comment_count': get_count('Comment'),
|
||||||
|
}, info)
|
||||||
|
@ -17,9 +17,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class MGTVIE(InfoExtractor):
|
class MGTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
_GEO_COUNTRIES = ['CN']
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://w.mgtv.com/b/301817/3826653.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
|
||||||
try:
|
try:
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
||||||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
|
|||||||
stream_data = self._download_json(
|
stream_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||||
'pm2': api_data['atc']['pm2'],
|
'pm2': api_data['atc']['pm2'],
|
||||||
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
stream_domain = stream_data['stream_domain'][0]
|
stream_domain = stream_data['stream_domain'][0]
|
||||||
|
@ -349,6 +349,18 @@ class MTVIE(MTVServicesInfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def extract_child_with_type(parent, t):
|
||||||
|
children = parent['children']
|
||||||
|
return next(c for c in children if c.get('type') == t)
|
||||||
|
|
||||||
|
def _extract_mgid(self, webpage):
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
|
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||||
|
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||||
|
return video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
|
|
||||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||||
IE_NAME = 'mtvjapan'
|
IE_NAME = 'mtvjapan'
|
||||||
|
@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
|
|||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
@ -394,8 +393,8 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'window\.__data\s*=\s*({.+});', webpage,
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
'bootstrap json'), video_id, js_to_json)
|
webpage, 'bootstrap json'), video_id)['props']['initialState']
|
||||||
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
video_data = data['article']['content'][0]['primaryMedia']['video']
|
video_data = data['article']['content'][0]['primaryMedia']['video']
|
||||||
|
@ -81,6 +81,29 @@ class NDRIE(NDRBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# with subtitles
|
||||||
|
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'extra18674',
|
||||||
|
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||||
|
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20201113',
|
||||||
|
'duration': 1749,
|
||||||
|
'subtitles': {
|
||||||
|
'de': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
'url': r're:^https://www\.ndr\.de.+',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -239,6 +262,20 @@ class NDREmbedBaseIE(InfoExtractor):
|
|||||||
'preference': quality_key(thumbnail.get('quality')),
|
'preference': quality_key(thumbnail.get('quality')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = config.get('tracks')
|
||||||
|
if tracks and isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
track_url = urljoin(url, track.get('src'))
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('srclang') or 'de', []).append({
|
||||||
|
'url': track_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -248,6 +285,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
|||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
# mutlimedia, not media title
|
# multimedia, not media title
|
||||||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '533198237',
|
'id': '533198237',
|
||||||
|
@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
|||||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||||
webpage, 'podcast data')
|
webpage, 'podcast data')
|
||||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||||
|
|
||||||
|
|
||||||
|
class NYTimesCookingIE(NYTimesBaseIE):
|
||||||
|
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||||
|
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000004756089',
|
||||||
|
'ext': 'mov',
|
||||||
|
'timestamp': 1479383008,
|
||||||
|
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||||
|
'title': 'Cranberry Tart',
|
||||||
|
'upload_date': '20161117',
|
||||||
|
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||||
|
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000003951728',
|
||||||
|
'ext': 'mov',
|
||||||
|
'timestamp': 1445509539,
|
||||||
|
'description': 'Turkey guide',
|
||||||
|
'upload_date': '20151022',
|
||||||
|
'title': 'Turkey',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
page_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||||
|
|
||||||
|
return self._extract_video_from_id(video_id)
|
||||||
|
@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
|
|||||||
if media_id:
|
if media_id:
|
||||||
return media_id, presumptive_id, upload_date, description
|
return media_id, presumptive_id, upload_date, description
|
||||||
|
|
||||||
# Fronline video embedded via flp
|
# Frontline video embedded via flp
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
|
||||||
if video_id:
|
if video_id:
|
||||||
|
176
youtube_dl/extractor/pinterest.py
Normal file
176
youtube_dl/extractor/pinterest.py
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
|
||||||
|
|
||||||
|
def _extract_resource(self, webpage, video_id):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'<script[^>]+\bid=["\']initial-state["\'][^>]*>({.+?})</script>',
|
||||||
|
webpage, 'application json'),
|
||||||
|
video_id)['resourceResponses']
|
||||||
|
|
||||||
|
def _extract_video(self, data, extract_formats=True):
|
||||||
|
video_id = data['id']
|
||||||
|
|
||||||
|
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
duration = None
|
||||||
|
if extract_formats:
|
||||||
|
for format_id, format_dict in data['videos']['video_list'].items():
|
||||||
|
if not isinstance(format_dict, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'width': int_or_none(format_dict.get('width')),
|
||||||
|
'height': int_or_none(format_dict.get('height')),
|
||||||
|
'duration': duration,
|
||||||
|
})
|
||||||
|
self._sort_formats(
|
||||||
|
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||||
|
|
||||||
|
description = data.get('description') or data.get('description_html') or data.get('seo_description')
|
||||||
|
timestamp = unified_timestamp(data.get('created_at'))
|
||||||
|
|
||||||
|
def _u(field):
|
||||||
|
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
|
||||||
|
|
||||||
|
uploader = _u('full_name')
|
||||||
|
uploader_id = _u('id')
|
||||||
|
|
||||||
|
repost_count = int_or_none(data.get('repin_count'))
|
||||||
|
comment_count = int_or_none(data.get('comment_count'))
|
||||||
|
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
|
||||||
|
tags = data.get('hashtags')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
images = data.get('images')
|
||||||
|
if isinstance(images, dict):
|
||||||
|
for thumbnail_id, thumbnail in images.items():
|
||||||
|
if not isinstance(thumbnail, dict):
|
||||||
|
continue
|
||||||
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'repost_count': repost_count,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'categories': categories,
|
||||||
|
'tags': tags,
|
||||||
|
'formats': formats,
|
||||||
|
'extractor_key': PinterestIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestIE(PinterestBaseIE):
|
||||||
|
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||||
|
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '664281013778109217',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Origami',
|
||||||
|
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
|
||||||
|
'duration': 57.7,
|
||||||
|
'timestamp': 1593073622,
|
||||||
|
'upload_date': '20200625',
|
||||||
|
'uploader': 'Love origami -I am Dafei',
|
||||||
|
'uploader_id': '586523688879454212',
|
||||||
|
'repost_count': 50,
|
||||||
|
'comment_count': 0,
|
||||||
|
'categories': list,
|
||||||
|
'tags': list,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._extract_resource(webpage, video_id)[0]['response']['data']
|
||||||
|
return self._extract_video(data)
|
||||||
|
|
||||||
|
|
||||||
|
class PinterestCollectionIE(PinterestBaseIE):
|
||||||
|
_VALID_URL = r'%s/[^/]+/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '585890301462791043',
|
||||||
|
'title': 'cool diys',
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if PinterestIE.suitable(url) else super(
|
||||||
|
PinterestCollectionIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
collection_name = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, collection_name)
|
||||||
|
resource = self._extract_resource(webpage, collection_name)[1]
|
||||||
|
entries = []
|
||||||
|
for item in resource['response']['data']:
|
||||||
|
if not isinstance(item, dict) or item.get('type') != 'pin':
|
||||||
|
continue
|
||||||
|
video_id = item.get('id')
|
||||||
|
if video_id:
|
||||||
|
# Some pins may not be available anonymously via pin URL
|
||||||
|
# video = self._extract_video(item, extract_formats=False)
|
||||||
|
# video.update({
|
||||||
|
# '_type': 'url_transparent',
|
||||||
|
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
|
||||||
|
# })
|
||||||
|
# entries.append(video)
|
||||||
|
entries.append(self._extract_video(item))
|
||||||
|
title = try_get(
|
||||||
|
resource, lambda x: x['options']['board_title'], compat_str)
|
||||||
|
collection_id = try_get(
|
||||||
|
resource, lambda x: x['options']['board_id'],
|
||||||
|
compat_str) or collection_name
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id=collection_id, playlist_title=title)
|
@ -1,3 +1,4 @@
|
|||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -15,9 +16,9 @@ from ..utils import (
|
|||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
remove_start,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
# This does not imply geo restriction (e.g.
|
# This does not imply geo restriction (e.g.
|
||||||
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
|
||||||
if media_url == 'http://download.rai.it/video_no_available.mp4':
|
if '/video_no_available.mp4' in media_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ext = determine_ext(media_url)
|
ext = determine_ext(media_url)
|
||||||
@ -122,40 +123,20 @@ class RaiBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class RaiPlayIE(RaiBaseIE):
|
class RaiPlayIE(RaiBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
|
|
||||||
'md5': '340aa3b7afb54bfd14a8c11786450d76',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'La Casa Bianca',
|
|
||||||
'alt_title': 'S2016 - Puntata del 23/10/2016',
|
|
||||||
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'uploader': 'Rai 3',
|
|
||||||
'creator': 'Rai 3',
|
|
||||||
'duration': 3278,
|
|
||||||
'timestamp': 1477764300,
|
|
||||||
'upload_date': '20161029',
|
|
||||||
'series': 'La Casa Bianca',
|
|
||||||
'season': '2016',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||||
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
'md5': '8970abf8caf8aef4696e7b1f2adfc696',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Report del 07/04/2014',
|
'title': 'Report del 07/04/2014',
|
||||||
'alt_title': 'S2013/14 - Puntata del 07/04/2014',
|
'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
|
||||||
'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
|
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'Rai 5',
|
'uploader': 'Rai Gulp',
|
||||||
'creator': 'Rai 5',
|
|
||||||
'duration': 6160,
|
'duration': 6160,
|
||||||
'series': 'Report',
|
'series': 'Report',
|
||||||
'season_number': 5,
|
|
||||||
'season': '2013/14',
|
'season': '2013/14',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -167,48 +148,52 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
base, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
url, video_id = mobj.group('url', 'id')
|
|
||||||
|
|
||||||
media = self._download_json(
|
media = self._download_json(
|
||||||
'%s?json' % url, video_id, 'Downloading video JSON')
|
base + '.json', video_id, 'Downloading video JSON')
|
||||||
|
|
||||||
title = media['name']
|
title = media['name']
|
||||||
|
|
||||||
video = media['video']
|
video = media['video']
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(video['contentUrl'], video_id)
|
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||||
self._sort_formats(relinker_info['formats'])
|
self._sort_formats(relinker_info['formats'])
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
if 'images' in media:
|
for _, value in media.get('images', {}).items():
|
||||||
for _, value in media.get('images').items():
|
if value:
|
||||||
if value:
|
thumbnails.append({
|
||||||
thumbnails.append({
|
'url': urljoin(url, value),
|
||||||
'url': value.replace('[RESOLUTION]', '600x400')
|
})
|
||||||
})
|
|
||||||
|
|
||||||
timestamp = unified_timestamp(try_get(
|
date_published = media.get('date_published')
|
||||||
media, lambda x: x['availabilities'][0]['start'], compat_str))
|
time_published = media.get('time_published')
|
||||||
|
if date_published and time_published:
|
||||||
|
date_published += ' ' + time_published
|
||||||
|
|
||||||
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
subtitles = self._extract_subtitles(url, video.get('subtitles'))
|
||||||
|
|
||||||
|
program_info = media.get('program_info') or {}
|
||||||
|
season = media.get('season')
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
|
||||||
|
'display_id': video_id,
|
||||||
'title': self._live_title(title) if relinker_info.get(
|
'title': self._live_title(title) if relinker_info.get(
|
||||||
'is_live') else title,
|
'is_live') else title,
|
||||||
'alt_title': media.get('subtitle'),
|
'alt_title': strip_or_none(media.get('subtitle')),
|
||||||
'description': media.get('description'),
|
'description': media.get('description'),
|
||||||
'uploader': strip_or_none(media.get('channel')),
|
'uploader': strip_or_none(media.get('channel')),
|
||||||
'creator': strip_or_none(media.get('editor')),
|
'creator': strip_or_none(media.get('editor') or None),
|
||||||
'duration': parse_duration(video.get('duration')),
|
'duration': parse_duration(video.get('duration')),
|
||||||
'timestamp': timestamp,
|
'timestamp': unified_timestamp(date_published),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'series': try_get(
|
'series': program_info.get('name'),
|
||||||
media, lambda x: x['isPartOf']['name'], compat_str),
|
'season_number': int_or_none(season),
|
||||||
'season_number': int_or_none(try_get(
|
'season': season if (season and not season.isdigit()) else None,
|
||||||
media, lambda x: x['isPartOf']['numeroStagioni'])),
|
'episode': media.get('episode_title'),
|
||||||
'season': media.get('stagione') or None,
|
'episode_number': int_or_none(media.get('episode')),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,16 +201,16 @@ class RaiPlayIE(RaiBaseIE):
|
|||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayLiveIE(RaiBaseIE):
|
class RaiPlayLiveIE(RaiPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/dirette/rainews24',
|
'url': 'http://www.raiplay.it/dirette/rainews24',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
|
||||||
'display_id': 'rainews24',
|
'display_id': 'rainews24',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'description': 'md5:6eca31500550f9376819f174e5644754',
|
'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
|
||||||
'uploader': 'Rai News 24',
|
'uploader': 'Rai News 24',
|
||||||
'creator': 'Rai News 24',
|
'creator': 'Rai News 24',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
@ -233,58 +218,50 @@ class RaiPlayLiveIE(RaiBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
video_id = self._search_regex(
|
|
||||||
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
|
|
||||||
webpage, 'content id')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': RaiPlayIE.ie_key(),
|
|
||||||
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RaiPlayPlaylistIE(InfoExtractor):
|
class RaiPlayPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nondirloalmiocapo',
|
'id': 'nondirloalmiocapo',
|
||||||
'title': 'Non dirlo al mio capo',
|
'title': 'Non dirlo al mio capo',
|
||||||
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
|
'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12,
|
'playlist_mincount': 12,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
base, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
program = self._download_json(
|
||||||
|
base + '.json', playlist_id, 'Downloading program JSON')
|
||||||
title = self._html_search_meta(
|
|
||||||
('programma', 'nomeProgramma'), webpage, 'title')
|
|
||||||
description = unescapeHTML(self._html_search_meta(
|
|
||||||
('description', 'og:description'), webpage, 'description'))
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for mobj in re.finditer(
|
for b in (program.get('blocks') or []):
|
||||||
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1',
|
for s in (b.get('sets') or []):
|
||||||
webpage):
|
s_id = s.get('id')
|
||||||
video_url = urljoin(url, mobj.group('path'))
|
if not s_id:
|
||||||
entries.append(self.url_result(
|
continue
|
||||||
video_url, ie=RaiPlayIE.ie_key(),
|
medias = self._download_json(
|
||||||
video_id=RaiPlayIE._match_id(video_url)))
|
'%s/%s.json' % (base, s_id), s_id,
|
||||||
|
'Downloading content set JSON', fatal=False)
|
||||||
|
if not medias:
|
||||||
|
continue
|
||||||
|
for m in (medias.get('items') or []):
|
||||||
|
path_id = m.get('path_id')
|
||||||
|
if not path_id:
|
||||||
|
continue
|
||||||
|
video_url = urljoin(url, path_id)
|
||||||
|
entries.append(self.url_result(
|
||||||
|
video_url, ie=RaiPlayIE.ie_key(),
|
||||||
|
video_id=RaiPlayIE._match_id(video_url)))
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, program.get('name'),
|
||||||
|
try_get(program, lambda x: x['program_info']['description']))
|
||||||
|
|
||||||
|
|
||||||
class RaiIE(RaiBaseIE):
|
class RaiIE(RaiBaseIE):
|
||||||
@ -300,7 +277,8 @@ class RaiIE(RaiBaseIE):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 1758,
|
'duration': 1758,
|
||||||
'upload_date': '20140612',
|
'upload_date': '20140612',
|
||||||
}
|
},
|
||||||
|
'skip': 'This content is available only in Italy',
|
||||||
}, {
|
}, {
|
||||||
# with ContentItem in many metas
|
# with ContentItem in many metas
|
||||||
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
|
||||||
@ -316,7 +294,7 @@ class RaiIE(RaiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# with ContentItem in og:url
|
# with ContentItem in og:url
|
||||||
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
|
||||||
'md5': '11959b4e44fa74de47011b5799490adf',
|
'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -326,18 +304,6 @@ class RaiIE(RaiBaseIE):
|
|||||||
'duration': 2214,
|
'duration': 2214,
|
||||||
'upload_date': '20161103',
|
'upload_date': '20161103',
|
||||||
}
|
}
|
||||||
}, {
|
|
||||||
# drawMediaRaiTV(...)
|
|
||||||
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
|
|
||||||
'md5': '2dd727e61114e1ee9c47f0da6914e178',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Il pacco',
|
|
||||||
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'upload_date': '20141221',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# initEdizione('ContentItem-...'
|
# initEdizione('ContentItem-...'
|
||||||
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
|
||||||
@ -349,17 +315,6 @@ class RaiIE(RaiBaseIE):
|
|||||||
'upload_date': '20170401',
|
'upload_date': '20170401',
|
||||||
},
|
},
|
||||||
'skip': 'Changes daily',
|
'skip': 'Changes daily',
|
||||||
}, {
|
|
||||||
# HDS live stream with only relinker URL
|
|
||||||
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'EuroNews',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# HLS live stream with ContentItem in og:url
|
# HLS live stream with ContentItem in og:url
|
||||||
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
|
||||||
@ -469,7 +424,7 @@ class RaiIE(RaiBaseIE):
|
|||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
relinker_url = self._search_regex(
|
relinker_url = self._proto_relative_url(self._search_regex(
|
||||||
r'''(?x)
|
r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
var\s+videoURL|
|
var\s+videoURL|
|
||||||
@ -481,7 +436,7 @@ class RaiIE(RaiBaseIE):
|
|||||||
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
|
||||||
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
|
||||||
''',
|
''',
|
||||||
webpage, 'relinker URL', group='url')
|
webpage, 'relinker URL', group='url'))
|
||||||
|
|
||||||
relinker_info = self._extract_relinker_info(
|
relinker_info = self._extract_relinker_info(
|
||||||
urljoin(url, relinker_url), video_id)
|
urljoin(url, relinker_url), video_id)
|
||||||
|
67
youtube_dl/extractor/rumble.py
Normal file
67
youtube_dl/extractor/rumble.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RumbleEmbedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://rumble.com/embed/v5pv5f',
|
||||||
|
'md5': '36a18a049856720189f30977ccbb2c34',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v5pv5f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
|
||||||
|
'timestamp': 1571611968,
|
||||||
|
'upload_date': '20191020',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video = self._download_json(
|
||||||
|
'https://rumble.com/embedJS/', video_id,
|
||||||
|
query={'request': 'video', 'v': video_id})
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for height, ua in (video.get('ua') or {}).items():
|
||||||
|
for i in range(2):
|
||||||
|
f_url = try_get(ua, lambda x: x[i], compat_str)
|
||||||
|
if f_url:
|
||||||
|
ext = determine_ext(f_url)
|
||||||
|
f = {
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': '%s-%sp' % (ext, height),
|
||||||
|
'height': int_or_none(height),
|
||||||
|
'url': f_url,
|
||||||
|
}
|
||||||
|
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
|
||||||
|
if bitrate:
|
||||||
|
f['tbr'] = int_or_none(bitrate)
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
author = video.get('author') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video.get('i'),
|
||||||
|
'timestamp': parse_iso8601(video.get('pubDate')),
|
||||||
|
'channel': author.get('name'),
|
||||||
|
'channel_url': author.get('url'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
}
|
@ -1,9 +1,15 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
urlencode_postdata,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ServusIE(InfoExtractor):
|
class ServusIE(InfoExtractor):
|
||||||
@ -12,20 +18,29 @@ class ServusIE(InfoExtractor):
|
|||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?:
|
||||||
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
||||||
servustv\.com/videos
|
(?:servustv|pm-wissen)\.com/videos
|
||||||
)
|
)
|
||||||
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# new URL schema
|
# new URL schema
|
||||||
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
||||||
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
'md5': '60474d4c21f3eb148838f215c37f02b9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AA-1T6VBU5PW1W12',
|
'id': 'AA-1T6VBU5PW1W12',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Die Grünen aus Sicht des Volkes',
|
'title': 'Die Grünen aus Sicht des Volkes',
|
||||||
|
'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
|
||||||
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 62.442,
|
||||||
|
'timestamp': 1605193976,
|
||||||
|
'upload_date': '20201112',
|
||||||
|
'series': 'Talk im Hangar-7',
|
||||||
|
'season': 'Season 9',
|
||||||
|
'season_number': 9,
|
||||||
|
'episode': 'Episode 31 - September 14',
|
||||||
|
'episode_number': 31,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# old URL schema
|
# old URL schema
|
||||||
@ -40,30 +55,94 @@ class ServusIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url).upper()
|
video_id = self._match_id(url).upper()
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._search_regex(
|
token = self._download_json(
|
||||||
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
'https://auth.redbullmediahouse.com/token', video_id,
|
||||||
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
|
'Downloading token', data=urlencode_postdata({
|
||||||
webpage, 'title', default=None,
|
'grant_type': 'client_credentials',
|
||||||
group='title') or self._og_search_title(webpage)
|
}), headers={
|
||||||
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
|
'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
|
||||||
description = self._og_search_description(webpage)
|
})
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
access_token = token['access_token']
|
||||||
|
token_type = token.get('token_type', 'Bearer')
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
video = self._download_json(
|
||||||
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
|
'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
|
||||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
video_id, 'Downloading video JSON', headers={
|
||||||
|
'Authorization': '%s %s' % (token_type, access_token),
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnail = None
|
||||||
|
for resource in video['resources']:
|
||||||
|
if not isinstance(resource, dict):
|
||||||
|
continue
|
||||||
|
format_url = url_or_none(resource.get('url'))
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
extension = resource.get('extension')
|
||||||
|
type_ = resource.get('type')
|
||||||
|
if extension == 'jpg' or type_ == 'reference_keyframe':
|
||||||
|
thumbnail = format_url
|
||||||
|
continue
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if type_ == 'dash' or ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
elif type_ == 'hls' or ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif extension == 'mp4' or ext == 'mp4':
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': type_,
|
||||||
|
'width': int_or_none(resource.get('width')),
|
||||||
|
'height': int_or_none(resource.get('height')),
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
attrs = {}
|
||||||
|
for attribute in video['attributes']:
|
||||||
|
if not isinstance(attribute, dict):
|
||||||
|
continue
|
||||||
|
key = attribute.get('fieldKey')
|
||||||
|
value = attribute.get('fieldValue')
|
||||||
|
if not key or not value:
|
||||||
|
continue
|
||||||
|
attrs[key] = value
|
||||||
|
|
||||||
|
title = attrs.get('title_stv') or video_id
|
||||||
|
alt_title = attrs.get('title')
|
||||||
|
description = attrs.get('long_description') or attrs.get('short_description')
|
||||||
|
series = attrs.get('label')
|
||||||
|
season = attrs.get('season')
|
||||||
|
episode = attrs.get('chapter')
|
||||||
|
duration = float_or_none(attrs.get('duration'), scale=1000)
|
||||||
|
season_number = int_or_none(self._search_regex(
|
||||||
|
r'Season (\d+)', season or '', 'season number', default=None))
|
||||||
|
episode_number = int_or_none(self._search_regex(
|
||||||
|
r'Episode (\d+)', episode or '', 'episode number', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': alt_title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
'duration': duration,
|
||||||
|
'timestamp': unified_timestamp(video.get('lastPublished')),
|
||||||
|
'series': series,
|
||||||
|
'season': season,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': episode_number,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
239
youtube_dl/extractor/skyit.py
Normal file
239
youtube_dl/extractor/skyit.py
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItPlayerIE(InfoExtractor):
|
||||||
|
IE_NAME = 'player.sky.it'
|
||||||
|
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
_DOMAIN = 'sky'
|
||||||
|
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
|
||||||
|
# http://static.sky.it/static/skyplayer/conf.json
|
||||||
|
_TOKEN_MAP = {
|
||||||
|
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
|
||||||
|
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
|
||||||
|
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
|
||||||
|
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
|
||||||
|
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
|
||||||
|
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
|
||||||
|
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
|
||||||
|
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
|
||||||
|
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _player_url_result(self, video_id):
|
||||||
|
return self.url_result(
|
||||||
|
self._PLAYER_TMPL % (video_id, self._DOMAIN),
|
||||||
|
SkyItPlayerIE.ie_key(), video_id)
|
||||||
|
|
||||||
|
def _parse_video(self, video, video_id):
|
||||||
|
title = video['title']
|
||||||
|
is_live = video.get('type') == 'live'
|
||||||
|
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
|
||||||
|
if not hls_url and video.get('geoblock' if is_live else 'geob'):
|
||||||
|
self.raise_geo_restricted(countries=['IT'])
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
|
||||||
|
else:
|
||||||
|
formats = self._extract_akamai_formats(
|
||||||
|
hls_url, video_id, {'http': 'videoplatform.sky.it'})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
|
||||||
|
'description': video.get('short_desc') or None,
|
||||||
|
'timestamp': unified_timestamp(video.get('create_date')),
|
||||||
|
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
domain = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||||
|
url).query).get('domain', [None])[0]
|
||||||
|
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
|
||||||
|
video = self._download_json(
|
||||||
|
'https://apid.sky.it/vdp/v1/getVideoData',
|
||||||
|
video_id, query={
|
||||||
|
'caller': 'sky',
|
||||||
|
'id': video_id,
|
||||||
|
'token': token
|
||||||
|
}, headers=self.geo_verification_headers())
|
||||||
|
return self._parse_video(video, video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItVideoIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'video.sky.it'
|
||||||
|
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
|
||||||
|
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||||
|
'timestamp': 1606036192,
|
||||||
|
'upload_date': '20201122',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._player_url_result(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItVideoLiveIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'video.sky.it:live'
|
||||||
|
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://video.sky.it/diretta/tg24',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||||
|
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
asset_id = compat_str(self._parse_json(self._search_regex(
|
||||||
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
|
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
|
||||||
|
livestream = self._download_json(
|
||||||
|
'https://apid.sky.it/vdp/v1/getLivestream',
|
||||||
|
asset_id, query={'id': asset_id})
|
||||||
|
return self._parse_video(livestream, asset_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItIE(SkyItPlayerIE):
|
||||||
|
IE_NAME = 'sky.it'
|
||||||
|
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631201',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
|
||||||
|
'upload_date': '20201121',
|
||||||
|
'timestamp': 1605995753,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
|
||||||
|
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '631227',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||||
|
'timestamp': 1606036192,
|
||||||
|
'upload_date': '20201122',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
self._VIDEO_ID_REGEX, webpage, 'video id')
|
||||||
|
return self._player_url_result(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItAcademyIE(SkyItIE):
|
||||||
|
IE_NAME = 'skyacademy.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
|
||||||
|
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '523458',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sky Academy "The Best CineCamp 2019"',
|
||||||
|
'timestamp': 1562843784,
|
||||||
|
'upload_date': '20190711',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'skyacademy'
|
||||||
|
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
|
||||||
|
|
||||||
|
|
||||||
|
class SkyItArteIE(SkyItIE):
|
||||||
|
IE_NAME = 'arte.sky.it'
|
||||||
|
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
|
||||||
|
'md5': '515aee97b87d7a018b6c80727d3e7e17',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '627926',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
|
||||||
|
'upload_date': '20201106',
|
||||||
|
'timestamp': 1604664493,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'skyarte'
|
||||||
|
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
|
||||||
|
|
||||||
|
|
||||||
|
class CieloTVItIE(SkyItIE):
|
||||||
|
IE_NAME = 'cielotv.it'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
|
||||||
|
'md5': 'c4deed77552ba901c2a0d9258320304b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '499240',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Il lunedì è sempre un dramma',
|
||||||
|
'upload_date': '20190329',
|
||||||
|
'timestamp': 1553862178,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'cielo'
|
||||||
|
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
|
||||||
|
|
||||||
|
|
||||||
|
class TV8ItIE(SkyItVideoIE):
|
||||||
|
IE_NAME = 'tv8.it'
|
||||||
|
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
|
||||||
|
'md5': '9ab906a3f75ea342ed928442f9dabd21',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '630529',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
|
||||||
|
'timestamp': 1605721374,
|
||||||
|
'upload_date': '20201118',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_DOMAIN = 'mtv8'
|
@ -558,7 +558,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
|
|||||||
|
|
||||||
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
|
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
|
||||||
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
def _extract_playlist(self, base_url, playlist_id, playlist_title):
|
||||||
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200.
|
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
|
||||||
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
|
||||||
COMMON_QUERY = {
|
COMMON_QUERY = {
|
||||||
'limit': 200,
|
'limit': 200,
|
||||||
|
@ -1,159 +1,54 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .nexx import (
|
from .jwplatform import JWPlatformIE
|
||||||
NexxIE,
|
|
||||||
NexxEmbedIE,
|
|
||||||
)
|
|
||||||
from .spiegeltv import SpiegeltvIE
|
|
||||||
from ..compat import compat_urlparse
|
|
||||||
from ..utils import (
|
|
||||||
parse_duration,
|
|
||||||
strip_or_none,
|
|
||||||
unified_timestamp,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SpiegelIE(InfoExtractor):
|
class SpiegelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
|
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
|
||||||
'md5': 'b57399839d055fccfeb9a0455c439868',
|
'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '563747',
|
'id': 'II0BUyxY',
|
||||||
|
'display_id': '1259285',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv',
|
'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
|
||||||
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
'description': 'md5:8029d8310232196eb235d27575a8b9f4',
|
||||||
'duration': 49,
|
'duration': 48.0,
|
||||||
'upload_date': '20130311',
|
'upload_date': '20130311',
|
||||||
'timestamp': 1362994320,
|
'timestamp': 1362997920,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
|
||||||
'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '580988',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
|
|
||||||
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
|
|
||||||
'duration': 983,
|
|
||||||
'upload_date': '20131115',
|
|
||||||
'timestamp': 1384546642,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
|
|
||||||
'md5': '97b91083a672d72976faa8433430afb9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '601883',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
|
|
||||||
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
|
|
||||||
'upload_date': '20140904',
|
|
||||||
'timestamp': 1409834160,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# nexx video
|
'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
|
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}, {
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
|
|
||||||
handle = self._request_webpage(metadata_url, video_id)
|
|
||||||
|
|
||||||
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
|
|
||||||
if SpiegeltvIE.suitable(handle.geturl()):
|
|
||||||
return self.url_result(handle.geturl(), 'Spiegeltv')
|
|
||||||
|
|
||||||
video_data = self._parse_json(self._webpage_read_content(
|
|
||||||
handle, metadata_url, video_id), video_id)
|
|
||||||
title = video_data['title']
|
|
||||||
nexx_id = video_data['nexxOmniaId']
|
|
||||||
domain_id = video_data.get('nexxOmniaDomain') or '748'
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'url': 'nexx:%s:%s' % (domain_id, nexx_id),
|
|
||||||
'title': title,
|
|
||||||
'description': strip_or_none(video_data.get('teaser')),
|
|
||||||
'duration': parse_duration(video_data.get('duration')),
|
|
||||||
'timestamp': unified_timestamp(video_data.get('datum')),
|
|
||||||
'ie_key': NexxIE.ie_key(),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SpiegelArticleIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
|
|
||||||
IE_NAME = 'Spiegel:Article'
|
|
||||||
IE_DESC = 'Articles on spiegel.de'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '1516455',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
|
|
||||||
'description': 're:^Patrick Kämnitz gehört.{100,}',
|
|
||||||
'upload_date': '20140825',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
|
|
||||||
'info_dict': {
|
|
||||||
|
|
||||||
},
|
|
||||||
'playlist_count': 6,
|
|
||||||
}, {
|
|
||||||
# Nexx iFrame embed
|
|
||||||
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '161464',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Nervenkitzel Achterbahn',
|
|
||||||
'alt_title': 'Karussellbauer in Deutschland',
|
|
||||||
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
|
|
||||||
'release_year': 2005,
|
|
||||||
'creator': 'SPIEGEL TV',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 2761,
|
|
||||||
'timestamp': 1394021479,
|
|
||||||
'upload_date': '20140305',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'format': 'bestvideo',
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
media_id = self._html_search_regex(
|
||||||
# Single video on top of the page
|
r'("|["\'])mediaId\1\s*:\s*("|["\'])(?P<id>(?:(?!\2).)+)\2',
|
||||||
video_link = self._search_regex(
|
webpage, 'media id', group='id')
|
||||||
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage,
|
return {
|
||||||
'video page URL', default=None)
|
'_type': 'url_transparent',
|
||||||
if video_link:
|
'id': video_id,
|
||||||
video_url = compat_urlparse.urljoin(
|
'display_id': video_id,
|
||||||
self.http_scheme() + '//spiegel.de/', video_link)
|
'url': 'jwplatform:%s' % media_id,
|
||||||
return self.url_result(video_url)
|
'title': self._og_search_title(webpage, default=None),
|
||||||
|
'ie_key': JWPlatformIE.ie_key(),
|
||||||
# Multiple embedded videos
|
}
|
||||||
embeds = re.findall(
|
|
||||||
r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
|
|
||||||
webpage)
|
|
||||||
entries = [
|
|
||||||
self.url_result(compat_urlparse.urljoin(
|
|
||||||
self.http_scheme() + '//spiegel.de/', embed_path))
|
|
||||||
for embed_path in embeds]
|
|
||||||
if embeds:
|
|
||||||
return self.playlist_result(entries)
|
|
||||||
|
|
||||||
return self.playlist_from_matches(
|
|
||||||
NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())
|
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .nexx import NexxIE
|
|
||||||
|
|
||||||
|
|
||||||
class SpiegeltvIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
|
|
||||||
'only_matching': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
return self.url_result(
|
|
||||||
'https://api.nexx.cloud/v3/748/videos/byid/%s'
|
|
||||||
% self._match_id(url), ie=NexxIE.ie_key())
|
|
@ -9,6 +9,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor):
|
|||||||
'format_id': player_type,
|
'format_id': player_type,
|
||||||
'url': vurl,
|
'url': vurl,
|
||||||
})
|
})
|
||||||
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
|
rights = try_get(video_info, lambda x: x['rights'], dict) or {}
|
||||||
|
if not formats and rights.get('geoBlockedSweden'):
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
'This video is only available in Sweden',
|
'This video is only available in Sweden',
|
||||||
countries=self._GEO_COUNTRIES)
|
countries=self._GEO_COUNTRIES)
|
||||||
@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor):
|
|||||||
episode = video_info.get('episodeTitle')
|
episode = video_info.get('episodeTitle')
|
||||||
episode_number = int_or_none(video_info.get('episodeNumber'))
|
episode_number = int_or_none(video_info.get('episodeNumber'))
|
||||||
|
|
||||||
|
timestamp = unified_timestamp(rights.get('validFrom'))
|
||||||
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
|
||||||
age_limit = None
|
age_limit = None
|
||||||
adult = dict_get(
|
adult = dict_get(
|
||||||
@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor):
|
|||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
'timestamp': timestamp,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'series': series,
|
'series': series,
|
||||||
'season_number': season_number,
|
'season_number': season_number,
|
||||||
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
|
|||||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
svt:(?P<svt_id>[^/?#&]+)|
|
(?:
|
||||||
|
svt:|
|
||||||
|
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
|
||||||
|
)
|
||||||
|
(?P<svt_id>[^/?#&]+)|
|
||||||
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
|
'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
|
||||||
'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
|
'md5': '2382036fd6f8c994856c323fe51c426e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5996901',
|
'id': 'jNwpV9P',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Flygplan till Haile Selassie',
|
'title': 'Det här är himlen',
|
||||||
'duration': 3527,
|
'timestamp': 1586044800,
|
||||||
'thumbnail': r're:^https?://.*[\.-]jpg$',
|
'upload_date': '20200405',
|
||||||
|
'duration': 3515,
|
||||||
|
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'subtitles': {
|
'subtitles': {
|
||||||
'sv': [{
|
'sv': [{
|
||||||
'ext': 'wsrt',
|
'ext': 'vtt',
|
||||||
}]
|
}]
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
|
||||||
|
# init segments that are smaller
|
||||||
|
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# geo restricted to Sweden
|
# geo restricted to Sweden
|
||||||
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
|
||||||
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'svt:14278044',
|
'url': 'svt:14278044',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'svt:eWv5MLX',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _adjust_title(self, info):
|
def _adjust_title(self, info):
|
||||||
@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE):
|
|||||||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self._extract_by_video_id(svt_id, webpage)
|
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||||
|
info_dict['thumbnail'] = thumbnail
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
class SVTSeriesIE(SVTPlayBaseIE):
|
class SVTSeriesIE(SVTPlayBaseIE):
|
||||||
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor):
|
|||||||
# return self._extract_via_api(kind, video_id)
|
# return self._extract_via_api(kind, video_id)
|
||||||
|
|
||||||
# JSON api does not provide some audio formats (e.g. ogg) thus
|
# JSON api does not provide some audio formats (e.g. ogg) thus
|
||||||
# extractiong audio via webpage
|
# extracting audio via webpage
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
|||||||
if m:
|
if m:
|
||||||
return [m.group('url')]
|
return [m.group('url')]
|
||||||
|
|
||||||
# Are whitesapces ignored in URLs?
|
# Are whitespaces ignored in URLs?
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/12044
|
# https://github.com/ytdl-org/youtube-dl/issues/12044
|
||||||
matches = re.findall(
|
matches = re.findall(
|
||||||
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
|
||||||
|
@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE):
|
|||||||
content_id = xpath_text(video_data, 'contentId') or video_id
|
content_id = xpath_text(video_data, 'contentId') or video_id
|
||||||
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
# rtmp_src = xpath_text(video_data, 'akamai/src')
|
||||||
# if rtmp_src:
|
# if rtmp_src:
|
||||||
# splited_rtmp_src = rtmp_src.split(',')
|
# split_rtmp_src = rtmp_src.split(',')
|
||||||
# if len(splited_rtmp_src) == 2:
|
# if len(split_rtmp_src) == 2:
|
||||||
# rtmp_src = splited_rtmp_src[1]
|
# rtmp_src = split_rtmp_src[1]
|
||||||
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
# aifp = xpath_text(video_data, 'akamai/aifp', default='')
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
|
@ -2,7 +2,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import unified_timestamp
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class URPlayIE(InfoExtractor):
|
class URPlayIE(InfoExtractor):
|
||||||
@ -15,8 +19,8 @@ class URPlayIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
|
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
|
||||||
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
|
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
|
||||||
'timestamp': 1513512768,
|
'timestamp': 1513292400,
|
||||||
'upload_date': '20171217',
|
'upload_date': '20171214',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||||
@ -25,7 +29,7 @@ class URPlayIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Tripp, Trapp, Träd : Sovkudde',
|
'title': 'Tripp, Trapp, Träd : Sovkudde',
|
||||||
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
|
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
|
||||||
'timestamp': 1440093600,
|
'timestamp': 1440086400,
|
||||||
'upload_date': '20150820',
|
'upload_date': '20150820',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
@ -35,37 +39,58 @@ class URPlayIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
urplayer_data = self._parse_json(self._search_regex(
|
urplayer_data = self._parse_json(self._html_search_regex(
|
||||||
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id)
|
r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"',
|
||||||
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect']
|
webpage, 'urplayer data'), video_id)['currentProduct']
|
||||||
|
episode = urplayer_data['title']
|
||||||
|
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
||||||
|
host = self._download_json(
|
||||||
|
'http://streaming-loadbalancer.ur.se/loadbalancer.json',
|
||||||
|
video_id)['redirect']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)):
|
for k, v in raw_streaming_info.items():
|
||||||
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr)
|
if not (k in ('sd', 'hd') and isinstance(v, dict)):
|
||||||
|
continue
|
||||||
|
file_http = v.get('location')
|
||||||
if file_http:
|
if file_http:
|
||||||
formats.extend(self._extract_wowza_formats(
|
formats.extend(self._extract_wowza_formats(
|
||||||
'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp']))
|
'http://%s/%splaylist.m3u8' % (host, file_http),
|
||||||
|
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
image = urplayer_data.get('image') or {}
|
||||||
for subtitle in urplayer_data.get('subtitles', []):
|
thumbnails = []
|
||||||
subtitle_url = subtitle.get('file')
|
for k, v in image.items():
|
||||||
kind = subtitle.get('kind')
|
t = {
|
||||||
if not subtitle_url or (kind and kind != 'captions'):
|
'id': k,
|
||||||
continue
|
'url': v,
|
||||||
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({
|
}
|
||||||
'url': subtitle_url,
|
wh = k.split('x')
|
||||||
})
|
if len(wh) == 2:
|
||||||
|
t.update({
|
||||||
|
'width': int_or_none(wh[0]),
|
||||||
|
'height': int_or_none(wh[1]),
|
||||||
|
})
|
||||||
|
thumbnails.append(t)
|
||||||
|
|
||||||
|
series = urplayer_data.get('series') or {}
|
||||||
|
series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': urplayer_data['title'],
|
'title': '%s : %s' % (series_title, episode) if series_title else episode,
|
||||||
'description': self._og_search_description(webpage),
|
'description': urplayer_data.get('description'),
|
||||||
'thumbnail': urplayer_data.get('image'),
|
'thumbnails': thumbnails,
|
||||||
'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')),
|
'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
|
||||||
'series': urplayer_data.get('series_title'),
|
'series': series_title,
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'duration': int_or_none(urplayer_data.get('duration')),
|
||||||
|
'categories': urplayer_data.get('categories'),
|
||||||
|
'tags': urplayer_data.get('keywords'),
|
||||||
|
'season': series.get('label'),
|
||||||
|
'episode': episode,
|
||||||
|
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||||
}
|
}
|
||||||
|
@ -1,74 +1,24 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .nbc import NBCIE
|
||||||
from ..utils import (
|
|
||||||
NO_DEFAULT,
|
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class USANetworkIE(AdobePassIE):
|
class USANetworkIE(NBCIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/[^/]+/video/[^/]+/(?P<id>\d+))'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity',
|
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
|
||||||
'md5': '33c0d2ba381571b414024440d08d57fd',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3086229',
|
'id': '4185302',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'HPE Cybersecurity',
|
'title': 'Intelligence (Trailer)',
|
||||||
'description': 'The more we digitize our world, the more vulnerable we are.',
|
'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
|
||||||
'upload_date': '20160818',
|
'upload_date': '20200715',
|
||||||
'timestamp': 1471535460,
|
'timestamp': 1594785600,
|
||||||
'uploader': 'NBCU-USA',
|
'uploader': 'NBCU-MPAT',
|
||||||
},
|
},
|
||||||
}
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
def _real_extract(self, url):
|
'skip_download': True,
|
||||||
display_id = self._match_id(url)
|
},
|
||||||
webpage = self._download_webpage(url, display_id)
|
}]
|
||||||
|
|
||||||
def _x(name, default=NO_DEFAULT):
|
|
||||||
return self._search_regex(
|
|
||||||
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
|
||||||
webpage, name, default=default, group='value')
|
|
||||||
|
|
||||||
video_id = _x('mpx-guid')
|
|
||||||
title = _x('episode-title')
|
|
||||||
mpx_account_id = _x('mpx-account-id', '2304992029')
|
|
||||||
|
|
||||||
query = {
|
|
||||||
'mbr': 'true',
|
|
||||||
}
|
|
||||||
if _x('is-full-episode', None) == '1':
|
|
||||||
query['manifest'] = 'm3u'
|
|
||||||
|
|
||||||
if _x('is-entitlement', None) == '1':
|
|
||||||
adobe_pass = {}
|
|
||||||
drupal_settings = self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'drupal settings', fatal=False)
|
|
||||||
if drupal_settings:
|
|
||||||
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
|
|
||||||
if drupal_settings:
|
|
||||||
adobe_pass = drupal_settings.get('adobePass', {})
|
|
||||||
resource = self._get_mvpd_resource(
|
|
||||||
adobe_pass.get('adobePassResourceId', 'usa'),
|
|
||||||
title, video_id, _x('episode-rating', 'TV-14'))
|
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
|
||||||
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
|
||||||
info.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': smuggle_url(update_url_query(
|
|
||||||
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
|
|
||||||
query), {'force_smil_url': True}),
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'series': _x('show-title', None),
|
|
||||||
'episode': title,
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import itertools
|
import itertools
|
||||||
@ -9,6 +10,10 @@ import re
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -165,19 +170,20 @@ class VikiIE(VikiBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# episode
|
# episode
|
||||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||||
'md5': '5fa476a902e902783ac7a4d615cdbc7a',
|
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '44699v',
|
'id': '44699v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Boys Over Flowers - Episode 1',
|
'title': 'Boys Over Flowers - Episode 1',
|
||||||
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
|
||||||
'duration': 4204,
|
'duration': 4172,
|
||||||
'timestamp': 1270496524,
|
'timestamp': 1270496524,
|
||||||
'upload_date': '20100405',
|
'upload_date': '20100405',
|
||||||
'uploader': 'group8',
|
'uploader': 'group8',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||||
}, {
|
}, {
|
||||||
# youtube external
|
# youtube external
|
||||||
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
|
||||||
@ -194,14 +200,15 @@ class VikiIE(VikiBaseIE):
|
|||||||
'uploader_id': 'ad14065n',
|
'uploader_id': 'ad14065n',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 13,
|
'age_limit': 13,
|
||||||
}
|
},
|
||||||
|
'skip': 'Page not found!',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.viki.com/player/44699v',
|
'url': 'http://www.viki.com/player/44699v',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# non-English description
|
# non-English description
|
||||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||||
'md5': '1713ae35df5a521b31f6dc40730e7c9c',
|
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '158036v',
|
'id': '158036v',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -217,8 +224,11 @@ class VikiIE(VikiBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._call_api(
|
resp = self._download_json(
|
||||||
'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
|
'https://www.viki.com/api/videos/' + video_id,
|
||||||
|
video_id, 'Downloading video JSON',
|
||||||
|
headers={'x-viki-app-ver': '4.0.57'})
|
||||||
|
video = resp['video']
|
||||||
|
|
||||||
self._check_errors(video)
|
self._check_errors(video)
|
||||||
|
|
||||||
@ -265,57 +275,74 @@ class VikiIE(VikiBaseIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
streams = self._call_api(
|
|
||||||
'videos/%s/streams.json' % video_id, video_id,
|
|
||||||
'Downloading video streams JSON')
|
|
||||||
|
|
||||||
if 'external' in streams:
|
|
||||||
result.update({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': streams['external']['url'],
|
|
||||||
})
|
|
||||||
return result
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_dict in streams.items():
|
|
||||||
height = int_or_none(self._search_regex(
|
def add_format(format_id, format_dict, protocol='http'):
|
||||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
# rtmps URLs does not seem to work
|
||||||
for protocol, format_dict in stream_dict.items():
|
if protocol == 'rtmps':
|
||||||
# rtmps URLs does not seem to work
|
return
|
||||||
if protocol == 'rtmps':
|
format_url = format_dict.get('url')
|
||||||
continue
|
if not format_url:
|
||||||
format_url = format_dict['url']
|
return
|
||||||
if format_id == 'm3u8':
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
stream = qs.get('stream', [None])[0]
|
||||||
format_url, video_id, 'mp4',
|
if stream:
|
||||||
entry_protocol='m3u8_native',
|
format_url = base64.b64decode(stream).decode()
|
||||||
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
if format_id in ('m3u8', 'hls'):
|
||||||
# Despite CODECS metadata in m3u8 all video-only formats
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
# are actually video+audio
|
format_url, video_id, 'mp4',
|
||||||
for f in m3u8_formats:
|
entry_protocol='m3u8_native',
|
||||||
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
m3u8_id='m3u8-%s' % protocol, fatal=False)
|
||||||
f['acodec'] = None
|
# Despite CODECS metadata in m3u8 all video-only formats
|
||||||
formats.extend(m3u8_formats)
|
# are actually video+audio
|
||||||
elif format_url.startswith('rtmp'):
|
for f in m3u8_formats:
|
||||||
mobj = re.search(
|
if '_drm/index_' in f['url']:
|
||||||
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
|
||||||
format_url)
|
|
||||||
if not mobj:
|
|
||||||
continue
|
continue
|
||||||
formats.append({
|
if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
|
||||||
'format_id': 'rtmp-%s' % format_id,
|
f['acodec'] = None
|
||||||
'ext': 'flv',
|
formats.append(f)
|
||||||
'url': mobj.group('url'),
|
elif format_id in ('mpd', 'dash'):
|
||||||
'play_path': mobj.group('playpath'),
|
formats.extend(self._extract_mpd_formats(
|
||||||
'app': mobj.group('app'),
|
format_url, video_id, 'mpd-%s' % protocol, fatal=False))
|
||||||
'page_url': url,
|
elif format_url.startswith('rtmp'):
|
||||||
})
|
mobj = re.search(
|
||||||
else:
|
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
|
||||||
formats.append({
|
format_url)
|
||||||
'url': format_url,
|
if not mobj:
|
||||||
'format_id': '%s-%s' % (format_id, protocol),
|
return
|
||||||
'height': height,
|
formats.append({
|
||||||
})
|
'format_id': 'rtmp-%s' % format_id,
|
||||||
|
'ext': 'flv',
|
||||||
|
'url': mobj.group('url'),
|
||||||
|
'play_path': mobj.group('playpath'),
|
||||||
|
'app': mobj.group('app'),
|
||||||
|
'page_url': url,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%s-%s' % (format_id, protocol),
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'^(\d+)[pP]$', format_id, 'height', default=None)),
|
||||||
|
})
|
||||||
|
|
||||||
|
for format_id, format_dict in (resp.get('streams') or {}).items():
|
||||||
|
add_format(format_id, format_dict)
|
||||||
|
if not formats:
|
||||||
|
streams = self._call_api(
|
||||||
|
'videos/%s/streams.json' % video_id, video_id,
|
||||||
|
'Downloading video streams JSON')
|
||||||
|
|
||||||
|
if 'external' in streams:
|
||||||
|
result.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': streams['external']['url'],
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
|
||||||
|
for format_id, stream_dict in streams.items():
|
||||||
|
for protocol, format_dict in stream_dict.items():
|
||||||
|
add_format(format_id, format_dict, protocol)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
result['formats'] = formats
|
result['formats'] = formats
|
||||||
|
@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
|||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 100
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
|
def _fetch_page(self, album_id, authorization, hashed_pass, page):
|
||||||
api_page = page + 1
|
api_page = page + 1
|
||||||
query = {
|
query = {
|
||||||
'fields': 'link,uri',
|
'fields': 'link,uri',
|
||||||
@ -934,7 +934,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
|||||||
videos = self._download_json(
|
videos = self._download_json(
|
||||||
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
'https://api.vimeo.com/albums/%s/videos' % album_id,
|
||||||
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
album_id, 'Downloading page %d' % api_page, query=query, headers={
|
||||||
'Authorization': 'jwt ' + authorizaion,
|
'Authorization': 'jwt ' + authorization,
|
||||||
})['data']
|
})['data']
|
||||||
for video in videos:
|
for video in videos:
|
||||||
link = video.get('link')
|
link = video.get('link')
|
||||||
@ -946,10 +946,13 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, album_id)
|
viewer = self._download_json(
|
||||||
viewer = self._parse_json(self._search_regex(
|
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||||
r'bootstrap_data\s*=\s*({.+?})</script>',
|
if not viewer:
|
||||||
webpage, 'bootstrap data'), album_id)['viewer']
|
webpage = self._download_webpage(url, album_id)
|
||||||
|
viewer = self._parse_json(self._search_regex(
|
||||||
|
r'bootstrap_data\s*=\s*({.+?})</script>',
|
||||||
|
webpage, 'bootstrap data'), album_id)['viewer']
|
||||||
jwt = viewer['jwt']
|
jwt = viewer['jwt']
|
||||||
album = self._download_json(
|
album = self._download_json(
|
||||||
'https://api.vimeo.com/albums/' + album_id,
|
'https://api.vimeo.com/albums/' + album_id,
|
||||||
|
@ -1,25 +1,30 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from .naver import NaverBaseIE
|
from .naver import NaverBaseIE
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
remove_start,
|
|
||||||
try_get,
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class VLiveIE(NaverBaseIE):
|
class VLiveBaseIE(NaverBaseIE):
|
||||||
|
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
||||||
|
|
||||||
|
|
||||||
|
class VLiveIE(VLiveBaseIE):
|
||||||
IE_NAME = 'vlive'
|
IE_NAME = 'vlive'
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
|
||||||
_NETRC_MACHINE = 'vlive'
|
_NETRC_MACHINE = 'vlive'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.vlive.tv/video/1326',
|
'url': 'http://www.vlive.tv/video/1326',
|
||||||
@ -27,7 +32,7 @@ class VLiveIE(NaverBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1326',
|
'id': '1326',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "[V LIVE] Girl's Day's Broadcast",
|
'title': "Girl's Day's Broadcast",
|
||||||
'creator': "Girl's Day",
|
'creator': "Girl's Day",
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'uploader_id': 'muploader_a',
|
'uploader_id': 'muploader_a',
|
||||||
@ -37,7 +42,7 @@ class VLiveIE(NaverBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16937',
|
'id': '16937',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '[V LIVE] 첸백시 걍방',
|
'title': '첸백시 걍방',
|
||||||
'creator': 'EXO',
|
'creator': 'EXO',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'subtitles': 'mincount:12',
|
'subtitles': 'mincount:12',
|
||||||
@ -58,12 +63,11 @@ class VLiveIE(NaverBaseIE):
|
|||||||
'subtitles': 'mincount:10',
|
'subtitles': 'mincount:10',
|
||||||
},
|
},
|
||||||
'skip': 'This video is only available for CH+ subscribers',
|
'skip': 'This video is only available for CH+ subscribers',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.vlive.tv/embed/1326',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -95,173 +99,122 @@ class VLiveIE(NaverBaseIE):
|
|||||||
if not is_logged_in():
|
if not is_logged_in():
|
||||||
raise ExtractorError('Unable to log in', expected=True)
|
raise ExtractorError('Unable to log in', expected=True)
|
||||||
|
|
||||||
|
def _call_api(self, path_template, video_id, fields=None):
|
||||||
|
query = {'appId': self._APP_ID}
|
||||||
|
if fields:
|
||||||
|
query['fields'] = fields
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
|
||||||
|
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
|
||||||
|
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
try:
|
||||||
'https://www.vlive.tv/video/%s' % video_id, video_id)
|
post = self._call_api(
|
||||||
|
'post/v1.0/officialVideoPost-%s', video_id,
|
||||||
|
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
|
||||||
|
raise
|
||||||
|
|
||||||
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
|
video = post['officialVideo']
|
||||||
VIDEO_PARAMS_FIELD = 'video params'
|
|
||||||
|
|
||||||
params = self._parse_json(self._search_regex(
|
def get_common_fields():
|
||||||
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
|
channel = post.get('channel') or {}
|
||||||
transform_source=lambda s: '[' + s + ']', fatal=False)
|
return {
|
||||||
|
'title': video.get('title'),
|
||||||
|
'creator': post.get('author', {}).get('nickname'),
|
||||||
|
'channel': channel.get('channelName'),
|
||||||
|
'channel_id': channel.get('channelCode'),
|
||||||
|
'duration': int_or_none(video.get('playTime')),
|
||||||
|
'view_count': int_or_none(video.get('playCount')),
|
||||||
|
'like_count': int_or_none(video.get('likeCount')),
|
||||||
|
'comment_count': int_or_none(video.get('commentCount')),
|
||||||
|
}
|
||||||
|
|
||||||
if not params or len(params) < 7:
|
video_type = video.get('type')
|
||||||
params = self._search_regex(
|
if video_type == 'VOD':
|
||||||
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
|
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
|
||||||
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
|
vod_id = video['vodId']
|
||||||
|
return merge_dicts(
|
||||||
status, long_video_id, key = params[2], params[5], params[6]
|
get_common_fields(),
|
||||||
status = remove_start(status, 'PRODUCT_')
|
self._extract_video_info(video_id, vod_id, inkey))
|
||||||
|
elif video_type == 'LIVE':
|
||||||
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
|
status = video.get('status')
|
||||||
return self._live(video_id, webpage)
|
if status == 'ON_AIR':
|
||||||
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
|
stream_url = self._call_api(
|
||||||
return self._replay(video_id, webpage, long_video_id, key)
|
'old/v3/live/%s/playInfo',
|
||||||
|
video_id)['result']['adaptiveStreamUrl']
|
||||||
if status == 'LIVE_END':
|
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
|
||||||
raise ExtractorError('Uploading for replay. Please wait...',
|
info = get_common_fields()
|
||||||
expected=True)
|
info.update({
|
||||||
elif status == 'COMING_SOON':
|
'title': self._live_title(video['title']),
|
||||||
raise ExtractorError('Coming soon!', expected=True)
|
'id': video_id,
|
||||||
elif status == 'CANCELED':
|
'formats': formats,
|
||||||
raise ExtractorError('We are sorry, '
|
'is_live': True,
|
||||||
'but the live broadcast has been canceled.',
|
})
|
||||||
expected=True)
|
return info
|
||||||
elif status == 'ONLY_APP':
|
elif status == 'ENDED':
|
||||||
raise ExtractorError('Unsupported video type', expected=True)
|
raise ExtractorError(
|
||||||
else:
|
'Uploading for replay. Please wait...', expected=True)
|
||||||
raise ExtractorError('Unknown status %s' % status)
|
elif status == 'RESERVED':
|
||||||
|
raise ExtractorError('Coming soon!', expected=True)
|
||||||
def _get_common_fields(self, webpage):
|
elif video.get('exposeStatus') == 'CANCEL':
|
||||||
title = self._og_search_title(webpage)
|
raise ExtractorError(
|
||||||
creator = self._html_search_regex(
|
'We are sorry, but the live broadcast has been canceled.',
|
||||||
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
|
expected=True)
|
||||||
webpage, 'creator', fatal=False)
|
else:
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
raise ExtractorError('Unknown status ' + status)
|
||||||
return {
|
|
||||||
'title': title,
|
|
||||||
'creator': creator,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _live(self, video_id, webpage):
|
|
||||||
init_page = self._download_init_page(video_id)
|
|
||||||
|
|
||||||
live_params = self._search_regex(
|
|
||||||
r'"liveStreamInfo"\s*:\s*(".*"),',
|
|
||||||
init_page, 'live stream info')
|
|
||||||
live_params = self._parse_json(live_params, video_id)
|
|
||||||
live_params = self._parse_json(live_params, video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for vid in live_params.get('resolutions', []):
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
vid['cdnUrl'], video_id, 'mp4',
|
|
||||||
m3u8_id=vid.get('name'),
|
|
||||||
fatal=False, live=True))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = self._get_common_fields(webpage)
|
|
||||||
info.update({
|
|
||||||
'title': self._live_title(info['title']),
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'is_live': True,
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
||||||
def _replay(self, video_id, webpage, long_video_id, key):
|
|
||||||
if '' in (long_video_id, key):
|
|
||||||
init_page = self._download_init_page(video_id)
|
|
||||||
video_info = self._parse_json(self._search_regex(
|
|
||||||
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
|
|
||||||
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
|
|
||||||
video_id)
|
|
||||||
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
|
|
||||||
self.raise_login_required(
|
|
||||||
'This video is only available for CH+ subscribers')
|
|
||||||
long_video_id, key = video_info['vid'], video_info['inkey']
|
|
||||||
|
|
||||||
return merge_dicts(
|
|
||||||
self._get_common_fields(webpage),
|
|
||||||
self._extract_video_info(video_id, long_video_id, key))
|
|
||||||
|
|
||||||
def _download_init_page(self, video_id):
|
|
||||||
return self._download_webpage(
|
|
||||||
'https://www.vlive.tv/video/init/view',
|
|
||||||
video_id, note='Downloading live webpage',
|
|
||||||
data=urlencode_postdata({'videoSeq': video_id}),
|
|
||||||
headers={
|
|
||||||
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class VLiveChannelIE(InfoExtractor):
|
class VLiveChannelIE(VLiveBaseIE):
|
||||||
IE_NAME = 'vlive:channel'
|
IE_NAME = 'vlive:channel'
|
||||||
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
|
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://channels.vlive.tv/FCD4B',
|
'url': 'http://channels.vlive.tv/FCD4B',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'FCD4B',
|
'id': 'FCD4B',
|
||||||
'title': 'MAMAMOO',
|
'title': 'MAMAMOO',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 110
|
'playlist_mincount': 110
|
||||||
}
|
}, {
|
||||||
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
|
'url': 'https://www.vlive.tv/channel/FCD4B',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _call_api(self, path, channel_key_suffix, channel_value, note, query):
|
||||||
|
q = {
|
||||||
|
'app_id': self._APP_ID,
|
||||||
|
'channel' + channel_key_suffix: channel_value,
|
||||||
|
}
|
||||||
|
q.update(query)
|
||||||
|
return self._download_json(
|
||||||
|
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
|
||||||
|
channel_value, note='Downloading ' + note, query=q)['result']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_code = self._match_id(url)
|
channel_code = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
channel_seq = self._call_api(
|
||||||
'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
|
'decodeChannelCode', 'Code', channel_code,
|
||||||
|
'decode channel code', {})['channelSeq']
|
||||||
|
|
||||||
app_id = None
|
|
||||||
|
|
||||||
app_js_url = self._search_regex(
|
|
||||||
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
|
|
||||||
webpage, 'app js', default=None, group='url')
|
|
||||||
|
|
||||||
if app_js_url:
|
|
||||||
app_js = self._download_webpage(
|
|
||||||
app_js_url, channel_code, 'Downloading app JS', fatal=False)
|
|
||||||
if app_js:
|
|
||||||
app_id = self._search_regex(
|
|
||||||
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
|
|
||||||
app_js, 'app id', default=None)
|
|
||||||
|
|
||||||
app_id = app_id or self._APP_ID
|
|
||||||
|
|
||||||
channel_info = self._download_json(
|
|
||||||
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
|
|
||||||
channel_code, note='Downloading decode channel code',
|
|
||||||
query={
|
|
||||||
'app_id': app_id,
|
|
||||||
'channelCode': channel_code,
|
|
||||||
'_': int(time.time())
|
|
||||||
})
|
|
||||||
|
|
||||||
channel_seq = channel_info['result']['channelSeq']
|
|
||||||
channel_name = None
|
channel_name = None
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
video_list = self._download_json(
|
video_list = self._call_api(
|
||||||
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
|
'getChannelVideoList', 'Seq', channel_seq,
|
||||||
channel_code, note='Downloading channel list page #%d' % page_num,
|
'channel list page #%d' % page_num, {
|
||||||
query={
|
|
||||||
'app_id': app_id,
|
|
||||||
'channelSeq': channel_seq,
|
|
||||||
# Large values of maxNumOfRows (~300 or above) may cause
|
# Large values of maxNumOfRows (~300 or above) may cause
|
||||||
# empty responses (see [1]), e.g. this happens for [2] that
|
# empty responses (see [1]), e.g. this happens for [2] that
|
||||||
# has more than 300 videos.
|
# has more than 300 videos.
|
||||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830
|
||||||
# 2. http://channels.vlive.tv/EDBF.
|
# 2. http://channels.vlive.tv/EDBF.
|
||||||
'maxNumOfRows': 100,
|
'maxNumOfRows': 100,
|
||||||
'_': int(time.time()),
|
|
||||||
'pageNo': page_num
|
'pageNo': page_num
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -269,11 +222,11 @@ class VLiveChannelIE(InfoExtractor):
|
|||||||
if not channel_name:
|
if not channel_name:
|
||||||
channel_name = try_get(
|
channel_name = try_get(
|
||||||
video_list,
|
video_list,
|
||||||
lambda x: x['result']['channelInfo']['channelName'],
|
lambda x: x['channelInfo']['channelName'],
|
||||||
compat_str)
|
compat_str)
|
||||||
|
|
||||||
videos = try_get(
|
videos = try_get(
|
||||||
video_list, lambda x: x['result']['videoList'], list)
|
video_list, lambda x: x['videoList'], list)
|
||||||
if not videos:
|
if not videos:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -289,79 +242,3 @@ class VLiveChannelIE(InfoExtractor):
|
|||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, channel_code, channel_name)
|
entries, channel_code, channel_name)
|
||||||
|
|
||||||
|
|
||||||
class VLivePlaylistIE(InfoExtractor):
|
|
||||||
IE_NAME = 'vlive:playlist'
|
|
||||||
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
|
|
||||||
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
|
|
||||||
_TESTS = [{
|
|
||||||
# regular working playlist
|
|
||||||
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '117963',
|
|
||||||
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
|
|
||||||
},
|
|
||||||
'playlist_mincount': 10
|
|
||||||
}, {
|
|
||||||
# playlist with no playlistVideoSeqs
|
|
||||||
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '22867',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '[V LIVE] Valentine Day Message from MINA',
|
|
||||||
'creator': 'TWICE',
|
|
||||||
'view_count': int
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _build_video_result(self, video_id, message):
|
|
||||||
self.to_screen(message)
|
|
||||||
return self.url_result(
|
|
||||||
self._VIDEO_URL_TEMPLATE % video_id,
|
|
||||||
ie=VLiveIE.ie_key(), video_id=video_id)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id, playlist_id = mobj.group('video_id', 'id')
|
|
||||||
|
|
||||||
if self._downloader.params.get('noplaylist'):
|
|
||||||
return self._build_video_result(
|
|
||||||
video_id,
|
|
||||||
'Downloading just video %s because of --no-playlist'
|
|
||||||
% video_id)
|
|
||||||
|
|
||||||
self.to_screen(
|
|
||||||
'Downloading playlist %s - add --no-playlist to just download video'
|
|
||||||
% playlist_id)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://www.vlive.tv/video/%s/playlist/%s'
|
|
||||||
% (video_id, playlist_id), playlist_id)
|
|
||||||
|
|
||||||
raw_item_ids = self._search_regex(
|
|
||||||
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
|
|
||||||
'playlist video seqs', default=None, fatal=False)
|
|
||||||
|
|
||||||
if not raw_item_ids:
|
|
||||||
return self._build_video_result(
|
|
||||||
video_id,
|
|
||||||
'Downloading just video %s because no playlist was found'
|
|
||||||
% video_id)
|
|
||||||
|
|
||||||
item_ids = self._parse_json(raw_item_ids, playlist_id)
|
|
||||||
|
|
||||||
entries = [
|
|
||||||
self.url_result(
|
|
||||||
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
|
|
||||||
video_id=compat_str(item_id))
|
|
||||||
for item_id in item_ids]
|
|
||||||
|
|
||||||
playlist_name = self._html_search_regex(
|
|
||||||
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
|
|
||||||
webpage, 'playlist title', fatal=False)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, playlist_name)
|
|
||||||
|
@ -54,17 +54,17 @@ class XiamiBaseIE(InfoExtractor):
|
|||||||
def _decrypt(origin):
|
def _decrypt(origin):
|
||||||
n = int(origin[0])
|
n = int(origin[0])
|
||||||
origin = origin[1:]
|
origin = origin[1:]
|
||||||
short_lenth = len(origin) // n
|
short_length = len(origin) // n
|
||||||
long_num = len(origin) - short_lenth * n
|
long_num = len(origin) - short_length * n
|
||||||
l = tuple()
|
l = tuple()
|
||||||
for i in range(0, n):
|
for i in range(0, n):
|
||||||
length = short_lenth
|
length = short_length
|
||||||
if i < long_num:
|
if i < long_num:
|
||||||
length += 1
|
length += 1
|
||||||
l += (origin[0:length], )
|
l += (origin[0:length], )
|
||||||
origin = origin[length:]
|
origin = origin[length:]
|
||||||
ans = ''
|
ans = ''
|
||||||
for i in range(0, short_lenth + 1):
|
for i in range(0, short_length + 1):
|
||||||
for j in range(0, n):
|
for j in range(0, n):
|
||||||
if len(l[j]) > i:
|
if len(l[j]) > i:
|
||||||
ans += l[j][i]
|
ans += l[j][i]
|
||||||
|
@ -90,7 +90,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
title, thumbnail, duration = [None] * 3
|
title, thumbnail, duration = [None] * 3
|
||||||
|
|
||||||
config = self._parse_json(self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
|
r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
|
||||||
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||||
if config:
|
if config:
|
||||||
config = config.get('mainRoll')
|
config = config.get('mainRoll')
|
||||||
|
@ -29,7 +29,6 @@ class YouPornIE(InfoExtractor):
|
|||||||
'upload_date': '20101217',
|
'upload_date': '20101217',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
@ -48,7 +47,6 @@ class YouPornIE(InfoExtractor):
|
|||||||
'upload_date': '20110418',
|
'upload_date': '20110418',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
@ -156,7 +154,8 @@ class YouPornIE(InfoExtractor):
|
|||||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
upload_date = unified_strdate(self._html_search_regex(
|
||||||
[r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
[r'UPLOADED:\s*<span>([^<]+)',
|
||||||
|
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
|
||||||
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
|
||||||
webpage, 'upload date', fatal=False))
|
webpage, 'upload date', fatal=False))
|
||||||
|
|
||||||
@ -171,7 +170,7 @@ class YouPornIE(InfoExtractor):
|
|||||||
webpage, 'view count', fatal=False, group='count'))
|
webpage, 'view count', fatal=False, group='count'))
|
||||||
comment_count = str_to_int(self._search_regex(
|
comment_count = str_to_int(self._search_regex(
|
||||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||||
webpage, 'comment count', fatal=False))
|
webpage, 'comment count', default=None))
|
||||||
|
|
||||||
def extract_tag_box(regex, title):
|
def extract_tag_box(regex, title):
|
||||||
tag_box = self._search_regex(regex, webpage, title, default=None)
|
tag_box = self._search_regex(regex, webpage, title, default=None)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -2458,7 +2458,7 @@ class XAttrMetadataError(YoutubeDLError):
|
|||||||
|
|
||||||
# Parsing code and msg
|
# Parsing code and msg
|
||||||
if (self.code in (errno.ENOSPC, errno.EDQUOT)
|
if (self.code in (errno.ENOSPC, errno.EDQUOT)
|
||||||
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
|
or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
|
||||||
self.reason = 'NO_SPACE'
|
self.reason = 'NO_SPACE'
|
||||||
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
|
||||||
self.reason = 'VALUE_TOO_LONG'
|
self.reason = 'VALUE_TOO_LONG'
|
||||||
@ -4078,7 +4078,7 @@ def js_to_json(code):
|
|||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
return v
|
return v
|
||||||
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
if v[0] in ("'", '"'):
|
if v[0] in ("'", '"'):
|
||||||
@ -4103,7 +4103,8 @@ def js_to_json(code):
|
|||||||
{comment}|,(?={skip}[\]}}])|
|
{comment}|,(?={skip}[\]}}])|
|
||||||
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
||||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||||
[0-9]+(?={skip}:)
|
[0-9]+(?={skip}:)|
|
||||||
|
!+
|
||||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
||||||
|
|
||||||
|
|
||||||
@ -4206,10 +4207,10 @@ def parse_codecs(codecs_str):
|
|||||||
# http://tools.ietf.org/html/rfc6381
|
# http://tools.ietf.org/html/rfc6381
|
||||||
if not codecs_str:
|
if not codecs_str:
|
||||||
return {}
|
return {}
|
||||||
splited_codecs = list(filter(None, map(
|
split_codecs = list(filter(None, map(
|
||||||
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
|
||||||
vcodec, acodec = None, None
|
vcodec, acodec = None, None
|
||||||
for full_codec in splited_codecs:
|
for full_codec in split_codecs:
|
||||||
codec = full_codec.split('.')[0]
|
codec = full_codec.split('.')[0]
|
||||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
|
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
|
||||||
if not vcodec:
|
if not vcodec:
|
||||||
@ -4220,10 +4221,10 @@ def parse_codecs(codecs_str):
|
|||||||
else:
|
else:
|
||||||
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
|
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
|
||||||
if not vcodec and not acodec:
|
if not vcodec and not acodec:
|
||||||
if len(splited_codecs) == 2:
|
if len(split_codecs) == 2:
|
||||||
return {
|
return {
|
||||||
'vcodec': splited_codecs[0],
|
'vcodec': split_codecs[0],
|
||||||
'acodec': splited_codecs[1],
|
'acodec': split_codecs[1],
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
return {
|
return {
|
||||||
@ -5462,7 +5463,7 @@ def encode_base_n(num, n, table=None):
|
|||||||
|
|
||||||
def decode_packed_codes(code):
|
def decode_packed_codes(code):
|
||||||
mobj = re.search(PACKED_CODES_RE, code)
|
mobj = re.search(PACKED_CODES_RE, code)
|
||||||
obfucasted_code, base, count, symbols = mobj.groups()
|
obfuscated_code, base, count, symbols = mobj.groups()
|
||||||
base = int(base)
|
base = int(base)
|
||||||
count = int(count)
|
count = int(count)
|
||||||
symbols = symbols.split('|')
|
symbols = symbols.split('|')
|
||||||
@ -5475,7 +5476,7 @@ def decode_packed_codes(code):
|
|||||||
|
|
||||||
return re.sub(
|
return re.sub(
|
||||||
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
|
||||||
obfucasted_code)
|
obfuscated_code)
|
||||||
|
|
||||||
|
|
||||||
def caesar(s, alphabet, shift):
|
def caesar(s, alphabet, shift):
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2020.09.20'
|
__version__ = '2020.11.21.1'
|
||||||
|
Loading…
Reference in New Issue
Block a user