Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into mkvthumbnail

This commit is contained in:
MrDoritos 2020-11-22 20:55:58 -05:00
commit aa9a04bab9
65 changed files with 2928 additions and 1932 deletions

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support - [ ] I'm reporting a broken site support
- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones - [ ] I've searched the bugtracker for similar issues including closed ones
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.09.20 [debug] youtube-dl version 2020.11.21.1
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,7 +19,7 @@ labels: 'site-support-request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a new site support request - [ ] I'm reporting a new site support request
- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights - [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones - [ ] I've searched the bugtracker for similar site support requests including closed ones

View File

@ -18,13 +18,13 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a site feature request - [ ] I'm reporting a site feature request
- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've searched the bugtracker for similar site feature requests including closed ones

View File

@ -18,7 +18,7 @@ title: ''
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. - Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape. - Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
--> -->
- [ ] I'm reporting a broken site support issue - [ ] I'm reporting a broken site support issue
- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
- [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones - [ ] I've searched the bugtracker for similar bug reports including closed ones
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2020.09.20 [debug] youtube-dl version 2020.11.21.1
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -19,13 +19,13 @@ labels: 'request'
<!-- <!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl: Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.09.20. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED. - First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.21.1. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates. - Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x]) - Finally, put x into all relevant boxes (like this [x])
--> -->
- [ ] I'm reporting a feature request - [ ] I'm reporting a feature request
- [ ] I've verified that I'm running youtube-dl version **2020.09.20** - [ ] I've verified that I'm running youtube-dl version **2020.11.21.1**
- [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've searched the bugtracker for similar feature requests including closed ones

149
ChangeLog
View File

@ -1,3 +1,118 @@
version 2020.11.21.1
Core
* [downloader/http] Fix crash during urlopen caused by missing reason
of URLError
* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries
of url_transparent (#27064)
Extractors
+ [svtplay] Add support for svt.se/barnkanalen (#24817)
+ [svt] Extract timestamp (#27130)
* [svtplay] Improve thumbnail extraction (#27130)
* [youtube] Fix error reason extraction (#27081)
* [youtube] Fix like and dislike count extraction (#25977)
+ [youtube:tab] Add support for current video and fix lives extraction (#27126)
* [infoq] Fix format extraction (#25984)
* [francetv] Update to fix thumbnail URL issue (#27120)
* [youtube] Improve yt initial data extraction (#27093)
+ [discoverynetworks] Add support new TLC/DMAX URLs (#27100)
* [rai] Fix protocol relative relinker URLs (#22766)
* [rai] Fix unavailable video format detection
* [rai] Improve extraction
* [rai] Fix extraction (#27077)
* [viki] Improve format extraction
* [viki] Fix stream extraction from MPD (#27092)
* [googledrive] Fix format extraction (#26979)
+ [amara] Add support for amara.org (#20618)
* [vimeo:album] Fix extraction (#27079)
* [mtv] Fix mgid extraction (#26841)
version 2020.11.19
Core
* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400,
#24151, #25617, #25618, #25586, #26068, #27072)
Extractors
* [youporn] Fix upload date extraction
* [youporn] Make comment count optional (#26986)
* [arte] Rework extractors
* Reimplement embed and playlist extractors to delegate to the single
entrypoint artetv extractor
* Improve embeds detection (#27057)
+ [arte] Extract m3u8 formats (#27061)
* [mgtv] Fix format extraction (#26415)
+ [lbry] Add support for odysee.com (#26806)
* [francetv] Improve info extraction
+ [francetv] Add fallback video URL extraction (#27047)
version 2020.11.18
Extractors
* [spiegel] Fix extraction (#24206, #24767)
* [youtube] Improve extraction
+ Add support for --no-playlist (#27009)
* Improve playlist and mix extraction (#26390, #26509, #26534, #27011)
+ Extract playlist uploader data
* [youtube:tab] Fix view count extraction (#27051)
* [malltv] Fix extraction (#27035)
+ [bandcamp] Extract playlist description (#22684)
* [urplay] Fix extraction (#26828)
* [youtube:tab] Fix playlist title extraction (#27015)
* [youtube] Fix chapters extraction (#26005)
version 2020.11.17
Core
* [utils] Skip ! prefixed code in js_to_json
Extractors
* [youtube:tab] Fix extraction with cookies provided (#27005)
* [lrt] Fix extraction with empty tags (#20264)
+ [ndr:embed:base] Extract subtitles (#25447, #26106)
+ [servus] Add support for pm-wissen.com (#25869)
* [servus] Fix extraction (#26872, #26967, #26983, #27000)
* [xtube] Fix extraction (#26996)
* [lrt] Fix extraction
+ [lbry] Add support for lbry.tv
+ [condenast] Extract subtitles
* [condenast] Fix extraction
* [bandcamp] Fix extraction (#26681, #26684)
* [rai] Fix RaiPlay extraction (#26064, #26096)
* [vlive] Fix extraction
* [usanetwork] Fix extraction
* [nbc] Fix NBCNews/Today/MSNBC extraction
* [cnbc] Fix extraction
version 2020.11.12
Extractors
* [youtube] Rework extractors
version 2020.11.01
Core
* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
* [downloader/http] Properly handle missing message in SSLError (#26646)
* [downloader/http] Fix access to not yet opened stream in retry
Extractors
* [youtube] Fix JS player URL extraction
* [ytsearch] Fix extraction (#26920)
* [afreecatv] Fix typo (#26970)
* [23video] Relax URL regular expression (#26870)
+ [ustream] Add support for video.ibm.com (#26894)
* [iqiyi] Fix typo (#26884)
+ [expressen] Add support for di.se (#26670)
* [iprima] Improve video id extraction (#26507, #26494)
version 2020.09.20 version 2020.09.20
Core Core
@ -283,7 +398,7 @@ Extractors
+ Add support for more domains + Add support for more domains
* [svt] Fix series extraction (#22297) * [svt] Fix series extraction (#22297)
* [svt] Fix article extraction (#22897, #22919) * [svt] Fix article extraction (#22897, #22919)
* [soundcloud] Imporve private playlist/set tracks extraction (#3707) * [soundcloud] Improve private playlist/set tracks extraction (#3707)
version 2020.01.24 version 2020.01.24
@ -409,7 +524,7 @@ Extractors
* [abcotvs] Relax URL regular expression and improve metadata extraction * [abcotvs] Relax URL regular expression and improve metadata extraction
(#18014) (#18014)
* [channel9] Reduce response size * [channel9] Reduce response size
* [adobetv] Improve extaction * [adobetv] Improve extraction
* Use OnDemandPagedList for list extractors * Use OnDemandPagedList for list extractors
* Reduce show extraction requests * Reduce show extraction requests
* Extract original video format and subtitles * Extract original video format and subtitles
@ -434,7 +549,7 @@ Extractors
* [dailymotion] Improve extraction * [dailymotion] Improve extraction
* Extract http formats included in m3u8 manifest * Extract http formats included in m3u8 manifest
* Fix user extraction (#3553, #21415) * Fix user extraction (#3553, #21415)
+ Add suport for User Authentication (#11491) + Add support for User Authentication (#11491)
* Fix password protected videos extraction (#23176) * Fix password protected videos extraction (#23176)
* Respect age limit option and family filter cookie value (#18437) * Respect age limit option and family filter cookie value (#18437)
* Handle video url playlist query param * Handle video url playlist query param
@ -519,7 +634,7 @@ Extractors
- [go90] Remove extractor - [go90] Remove extractor
* [kakao] Remove raw request * [kakao] Remove raw request
+ [kakao] Extract format total bitrate + [kakao] Extract format total bitrate
* [daum] Fix VOD and Clip extracton (#15015) * [daum] Fix VOD and Clip extraction (#15015)
* [kakao] Improve extraction * [kakao] Improve extraction
+ Add support for embed URLs + Add support for embed URLs
+ Add support for Kakao Legacy vid based embed URLs + Add support for Kakao Legacy vid based embed URLs
@ -563,7 +678,7 @@ Extractors
* Improve format extraction (#22123) * Improve format extraction (#22123)
+ Extract uploader_id and uploader_url (#21916) + Extract uploader_id and uploader_url (#21916)
+ Extract all known thumbnails (#19071, #20659) + Extract all known thumbnails (#19071, #20659)
* Fix extration for private playlists (#20976) * Fix extraction for private playlists (#20976)
+ Add support for playlist embeds (#20976) + Add support for playlist embeds (#20976)
* Skip preview formats (#22806) * Skip preview formats (#22806)
* [dplay] Improve extraction * [dplay] Improve extraction
@ -1038,7 +1153,7 @@ Extractors
* [hbo] Fix extraction and extract subtitles (#14629, #13709) * [hbo] Fix extraction and extract subtitles (#14629, #13709)
* [youtube] Extract srv[1-3] subtitle formats (#20566) * [youtube] Extract srv[1-3] subtitle formats (#20566)
* [adultswim] Fix extraction (#18025) * [adultswim] Fix extraction (#18025)
* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339) * [teamcoco] Fix extraction and add support for subdomains (#17099, #20339)
* [adn] Fix subtitle compatibility with ffmpeg * [adn] Fix subtitle compatibility with ffmpeg
* [adn] Fix extraction and add support for positioning styles (#20549) * [adn] Fix extraction and add support for positioning styles (#20549)
* [vk] Use unique video id (#17848) * [vk] Use unique video id (#17848)
@ -1450,7 +1565,7 @@ version 2018.11.18
Extractors Extractors
+ [wwe] Extract subtitles + [wwe] Extract subtitles
+ [wwe] Add support for playlistst (#14781) + [wwe] Add support for playlists (#14781)
+ [wwe] Add support for wwe.com (#14781, #17450) + [wwe] Add support for wwe.com (#14781, #17450)
* [vk] Detect geo restriction (#17767) * [vk] Detect geo restriction (#17767)
* [openload] Use original host during extraction (#18211) * [openload] Use original host during extraction (#18211)
@ -2483,7 +2598,7 @@ Extractors
* [youku] Update ccode (#14872) * [youku] Update ccode (#14872)
* [mnet] Fix format extraction (#14883) * [mnet] Fix format extraction (#14883)
+ [xiami] Add Referer header to API request + [xiami] Add Referer header to API request
* [mtv] Correct scc extention in extracted subtitles (#13730) * [mtv] Correct scc extension in extracted subtitles (#13730)
* [vvvvid] Fix extraction for kenc videos (#13406) * [vvvvid] Fix extraction for kenc videos (#13406)
+ [br] Add support for BR Mediathek videos (#14560, #14788) + [br] Add support for BR Mediathek videos (#14560, #14788)
+ [daisuki] Add support for motto.daisuki.com (#14681) + [daisuki] Add support for motto.daisuki.com (#14681)
@ -2504,7 +2619,7 @@ Extractors
* [nexx] Extract more formats * [nexx] Extract more formats
+ [openload] Add support for openload.link (#14763) + [openload] Add support for openload.link (#14763)
* [empflix] Relax URL regular expression * [empflix] Relax URL regular expression
* [empflix] Fix extractrion * [empflix] Fix extraction
* [tnaflix] Don't modify download URLs (#14811) * [tnaflix] Don't modify download URLs (#14811)
- [gamersyde] Remove extractor - [gamersyde] Remove extractor
* [francetv:generationwhat] Fix extraction * [francetv:generationwhat] Fix extraction
@ -2699,7 +2814,7 @@ Extractors
* [yahoo] Bypass geo restriction for brightcove (#14210) * [yahoo] Bypass geo restriction for brightcove (#14210)
* [yahoo] Use extracted brightcove account id (#14210) * [yahoo] Use extracted brightcove account id (#14210)
* [rtve:alacarta] Fix extraction (#14290) * [rtve:alacarta] Fix extraction (#14290)
+ [yahoo] Add support for custom brigthcove embeds (#14210) + [yahoo] Add support for custom brightcove embeds (#14210)
+ [generic] Add support for Video.js embeds + [generic] Add support for Video.js embeds
+ [gfycat] Add support for /gifs/detail URLs (#14322) + [gfycat] Add support for /gifs/detail URLs (#14322)
* [generic] Fix infinite recursion for twitter:player URLs (#14339) * [generic] Fix infinite recursion for twitter:player URLs (#14339)
@ -2944,7 +3059,7 @@ Extractors
* [amcnetworks] Make rating optional (#12453) * [amcnetworks] Make rating optional (#12453)
* [cloudy] Fix extraction (#13737) * [cloudy] Fix extraction (#13737)
+ [nickru] Add support for nickelodeon.ru + [nickru] Add support for nickelodeon.ru
* [mtv] Improve thumbnal extraction * [mtv] Improve thumbnail extraction
* [nick] Automate geo-restriction bypass (#13711) * [nick] Automate geo-restriction bypass (#13711)
* [niconico] Improve error reporting (#13696) * [niconico] Improve error reporting (#13696)
@ -3308,7 +3423,7 @@ Extractors
+ [cda] Support birthday verification (#12789) + [cda] Support birthday verification (#12789)
* [leeco] Fix extraction (#12974) * [leeco] Fix extraction (#12974)
+ [pbs] Extract chapters + [pbs] Extract chapters
* [amp] Imporove thumbnail and subtitles extraction * [amp] Improve thumbnail and subtitles extraction
* [foxsports] Fix extraction (#12945) * [foxsports] Fix extraction (#12945)
- [coub] Remove comment count extraction (#12941) - [coub] Remove comment count extraction (#12941)
@ -3478,7 +3593,7 @@ Extractors
+ [rbmaradio] Add support for redbullradio.com URLs (#12687) + [rbmaradio] Add support for redbullradio.com URLs (#12687)
+ [npo:live] Add support for default URL (#12555) + [npo:live] Add support for default URL (#12555)
* [mixcloud:playlist] Fix title, description and view count extraction (#12582) * [mixcloud:playlist] Fix title, description and view count extraction (#12582)
+ [thesun] Add suport for thesun.co.uk (#11298, #12674) + [thesun] Add support for thesun.co.uk (#11298, #12674)
+ [ceskateleveize:porady] Add support for porady (#7411, #12645) + [ceskateleveize:porady] Add support for porady (#7411, #12645)
* [ceskateleveize] Improve extraction and remove URL replacement hacks * [ceskateleveize] Improve extraction and remove URL replacement hacks
+ [kaltura] Add support for iframe embeds (#12679) + [kaltura] Add support for iframe embeds (#12679)
@ -3517,7 +3632,7 @@ Extractors
* [funimation] Fix extraction (#10696, #11773) * [funimation] Fix extraction (#10696, #11773)
+ [xfileshare] Add support for vidabc.com (#12589) + [xfileshare] Add support for vidabc.com (#12589)
+ [xfileshare] Improve extraction and extract hls formats + [xfileshare] Improve extraction and extract hls formats
+ [crunchyroll] Pass geo verifcation proxy + [crunchyroll] Pass geo verification proxy
+ [cwtv] Extract ISM formats + [cwtv] Extract ISM formats
+ [tvplay] Bypass geo restriction + [tvplay] Bypass geo restriction
+ [vrv] Add support for vrv.co + [vrv] Add support for vrv.co
@ -3581,7 +3696,7 @@ Extractors
+ [bostonglobe] Add extractor for bostonglobe.com (#12099) + [bostonglobe] Add extractor for bostonglobe.com (#12099)
+ [toongoggles] Add support for toongoggles.com (#12171) + [toongoggles] Add support for toongoggles.com (#12171)
+ [medialaan] Add support for Medialaan sites (#9974, #11912) + [medialaan] Add support for Medialaan sites (#9974, #11912)
+ [discoverynetworks] Add support for more domains and bypass geo restiction + [discoverynetworks] Add support for more domains and bypass geo restriction
* [openload] Fix extraction (#10408) * [openload] Fix extraction (#10408)
@ -5171,7 +5286,7 @@ version 2016.07.09.1
Fixed/improved extractors Fixed/improved extractors
- youtube - youtube
- ard - ard
- srmediatek (#9373) - srmediathek (#9373)
version 2016.07.09 version 2016.07.09
@ -5235,7 +5350,7 @@ Fixed/improved extractors
- kaltura (#5557) - kaltura (#5557)
- la7 - la7
- Changed features - Changed features
- Rename --cn-verfication-proxy to --geo-verification-proxy - Rename --cn-verification-proxy to --geo-verification-proxy
Miscellaneous Miscellaneous
- Add script for displaying downloads statistics - Add script for displaying downloads statistics

View File

@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
return s return s
# find the correct sorting and add the required base classes so that sublcasses # find the correct sorting and add the required base classes so that subclasses
# can be correctly created # can be correctly created
classes = _ALL_CLASSES[:-1] classes = _ALL_CLASSES[:-1]
ordered_cls = [] ordered_cls = []

View File

@ -41,6 +41,7 @@
- **AlJazeera** - **AlJazeera**
- **Allocine** - **Allocine**
- **AlphaPorno** - **AlphaPorno**
- **Amara**
- **AMCNetworks** - **AMCNetworks**
- **AmericasTestKitchen** - **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
@ -58,9 +59,9 @@
- **ARD:mediathek** - **ARD:mediathek**
- **ARDBetaMediathek** - **ARDBetaMediathek**
- **Arkena** - **Arkena**
- **arte.tv:+7** - **ArteTV**
- **arte.tv:embed** - **ArteTVEmbed**
- **arte.tv:playlist** - **ArteTVPlaylist**
- **AsianCrush** - **AsianCrush**
- **AsianCrushPlaylist** - **AsianCrushPlaylist**
- **AtresPlayer** - **AtresPlayer**
@ -417,6 +418,7 @@
- **la7.it** - **la7.it**
- **laola1tv** - **laola1tv**
- **laola1tv:embed** - **laola1tv:embed**
- **lbry.tv**
- **LCI** - **LCI**
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
@ -823,8 +825,6 @@
- **SpankBangPlaylist** - **SpankBangPlaylist**
- **Spankwire** - **Spankwire**
- **Spiegel** - **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr** - **sport.francetvinfo.fr**
- **Sport5** - **Sport5**
- **SportBox** - **SportBox**
@ -1042,7 +1042,6 @@
- **vk:wallpost** - **vk:wallpost**
- **vlive** - **vlive**
- **vlive:channel** - **vlive:channel**
- **vlive:playlist**
- **Vodlocker** - **Vodlocker**
- **VODPl** - **VODPl**
- **VODPlatform** - **VODPlatform**
@ -1131,20 +1130,15 @@
- **YourPorn** - **YourPorn**
- **YourUpload** - **YourUpload**
- **youtube**: YouTube.com - **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:live**: YouTube.com live streams
- **youtube:playlist**: YouTube.com playlists - **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- **youtube:search**: YouTube.com searches - **youtube:search**: YouTube.com searches
- **youtube:search:date**: YouTube.com searches, newest videos first - **youtube:search:date**: YouTube.com searches, newest videos first
- **youtube:search_url**: YouTube.com search URLs
- **youtube:show**: YouTube.com (multi-season) shows
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) - **youtube:tab**: YouTube.com tab
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
- **YoutubeYtUser**
- **Zapiks** - **Zapiks**
- **Zaq1** - **Zaq1**
- **Zattoo** - **Zattoo**

View File

@ -37,7 +37,7 @@
"writeinfojson": true, "writeinfojson": true,
"writesubtitles": false, "writesubtitles": false,
"allsubtitles": false, "allsubtitles": false,
"listssubtitles": false, "listsubtitles": false,
"socket_timeout": 20, "socket_timeout": 20,
"fixup": "never" "fixup": "never"
} }

View File

@ -919,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'testex') self.assertEqual(downloaded['extractor'], 'testex')
self.assertEqual(downloaded['extractor_key'], 'TestEx') self.assertEqual(downloaded['extractor_key'], 'TestEx')
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
class _YDL(YDL):
def __init__(self, *args, **kwargs):
super(_YDL, self).__init__(*args, **kwargs)
def trouble(self, s, tb=None):
pass
ydl = _YDL({
'format': 'extra',
'ignoreerrors': True,
})
class VideoIE(InfoExtractor):
_VALID_URL = r'video:(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
formats = [{
'format_id': 'default',
'url': 'url:',
}]
if video_id == '0':
raise ExtractorError('foo')
if video_id == '2':
formats.append({
'format_id': 'extra',
'url': TEST_URL,
})
return {
'id': video_id,
'title': 'Video %s' % video_id,
'formats': formats,
}
class PlaylistIE(InfoExtractor):
_VALID_URL = r'playlist:'
def _entries(self):
for n in range(3):
video_id = compat_str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
'id': video_id,
'url': 'video:%s' % video_id,
'title': 'Video Transparent %s' % video_id,
}
def _real_extract(self, url):
return self.playlist_result(self._entries())
ydl.add_info_extractor(VideoIE(ydl))
ydl.add_info_extractor(PlaylistIE(ydl))
info = ydl.extract_info('playlist:')
entries = info['entries']
self.assertEqual(len(entries), 3)
self.assertTrue(entries[0] is None)
self.assertTrue(entries[1] is None)
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(entries[2], downloaded)
self.assertEqual(downloaded['url'], TEST_URL)
self.assertEqual(downloaded['title'], 'Video Transparent 2')
self.assertEqual(downloaded['id'], '2')
self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -31,16 +31,17 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_playlist_matching(self): def test_youtube_playlist_matching(self):
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
assertPlaylist('PL63F0C78739B09958') assertPlaylist('PL63F0C78739B09958')
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
# Top tracks # Top tracks
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
def test_youtube_matching(self): def test_youtube_matching(self):
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
@ -51,26 +52,22 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
def test_youtube_channel_matching(self): def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
def test_youtube_user_matching(self): # def test_youtube_user_matching(self):
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user']) # self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
def test_youtube_feeds(self): def test_youtube_feeds(self):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
def test_youtube_show_matching(self): # def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) # self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_youtube_extract(self): def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)

View File

@ -937,6 +937,28 @@ class TestUtil(unittest.TestCase):
self.assertEqual(d['x'], 1) self.assertEqual(d['x'], 1)
self.assertEqual(d['y'], 'a') self.assertEqual(d['y'], 'a')
# Just drop ! prefix for now though this results in a wrong value
on = js_to_json('''{
a: !0,
b: !1,
c: !!0,
d: !!42.42,
e: !!![],
f: !"abc",
g: !"",
!42: 42
}''')
self.assertEqual(json.loads(on), {
'a': 0,
'b': 1,
'c': 0,
'd': 42.42,
'e': [],
'f': "abc",
'g': "",
'42': 42
})
on = js_to_json('["abc", "def",]') on = js_to_json('["abc", "def",]')
self.assertEqual(json.loads(on), ['abc', 'def']) self.assertEqual(json.loads(on), ['abc', 'def'])

View File

@ -793,21 +793,14 @@ class YoutubeDL(object):
self.report_warning('The program functionality for this site has been marked as broken, ' self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.') 'and will probably not work.')
return self.__extract_info(url, ie, download, extra_info, process)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
def __handle_extraction_exceptions(func):
def wrapper(self, *args, **kwargs):
try: try:
ie_result = ie.extract(url) return func(self, *args, **kwargs)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
break
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else:
return ie_result
except GeoRestrictedError as e: except GeoRestrictedError as e:
msg = e.msg msg = e.msg
if e.countries: if e.countries:
@ -815,20 +808,33 @@ class YoutubeDL(object):
map(ISO3166Utils.short2full, e.countries)) map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg) self.report_error(msg)
break
except ExtractorError as e: # An error we somewhat expected except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback()) self.report_error(compat_str(e), e.format_traceback())
break
except MaxDownloadsReached: except MaxDownloadsReached:
raise raise
except Exception as e: except Exception as e:
if self.params.get('ignoreerrors', False): if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
break
else: else:
raise raise
return wrapper
@__handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
if isinstance(ie_result, list):
# Backwards compatibility: old IE result format
ie_result = {
'_type': 'compat_list',
'entries': ie_result,
}
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
else: else:
self.report_error('no suitable InfoExtractor for URL %s' % url) return ie_result
def add_default_extra_info(self, ie_result, ie, url): def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, { self.add_extra_info(ie_result, {
@ -1003,9 +1009,8 @@ class YoutubeDL(object):
self.to_screen('[download] ' + reason) self.to_screen('[download] ' + reason)
continue continue
entry_result = self.process_ie_result(entry, entry_result = self.__process_iterable_entry(entry, download, extra)
download=download, # TODO: skip failed (empty) entries?
extra_info=extra)
playlist_results.append(entry_result) playlist_results.append(entry_result)
ie_result['entries'] = playlist_results ie_result['entries'] = playlist_results
self.to_screen('[download] Finished downloading playlist: %s' % playlist) self.to_screen('[download] Finished downloading playlist: %s' % playlist)
@ -1034,6 +1039,11 @@ class YoutubeDL(object):
else: else:
raise Exception('Invalid result type: %s' % result_type) raise Exception('Invalid result type: %s' % result_type)
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
def _build_format_filter(self, filter_spec): def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec " " Returns a function to filter the formats according to the filter_spec "

View File

@ -2345,7 +2345,7 @@ except ImportError: # Python <3.4
# HTMLParseError has been deprecated in Python 3.3 and removed in # HTMLParseError has been deprecated in Python 3.3 and removed in
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
# and uniform cross-version exceptiong handling # and uniform cross-version exception handling
class compat_HTMLParseError(Exception): class compat_HTMLParseError(Exception):
pass pass

View File

@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
except (compat_urllib_error.URLError, ) as err: except (compat_urllib_error.URLError, ) as err:
if isinstance(err.reason, socket.timeout): # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
reason = getattr(err, 'reason', None)
if isinstance(reason, socket.timeout):
raise RetryDownload(err) raise RetryDownload(err)
raise err raise err
# When trying to resume, Content-Range HTTP header of response has to be checked # When trying to resume, Content-Range HTTP header of response has to be checked

View File

@ -0,0 +1,103 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from ..utils import (
int_or_none,
parse_iso8601,
update_url_query,
)
class AmaraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
_TESTS = [{
# Youtube
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
'info_dict': {
'id': 'h6ZuVdvYnfE',
'ext': 'mp4',
'title': 'Why jury trials are becoming less common',
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20160813',
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
'md5': '99392c75fa05d432a8f11df03612195e',
'info_dict': {
'id': '18622084',
'ext': 'mov',
'title': 'Vimeo at CES 2011!',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
'md5': 'd3970f08512738ee60c5807311ff5d3f',
'info_dict': {
'id': 's8KL7I3jLmh6',
'ext': 'mp4',
'title': 'The danger of a single story',
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
'thumbnail': r're:^https?://.*\.jpg$',
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]
subtitles = {}
for language in (meta.get('languages') or []):
subtitles_uri = language.get('subtitles_uri')
if not (subtitles_uri and language.get('published')):
continue
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
for f in ('json', 'srt', 'vtt'):
subtitle.append({
'ext': f,
'url': update_url_query(subtitles_uri, {'format': f}),
})
info = {
'url': video_url,
'id': video_id,
'subtitles': subtitles,
'title': title,
'description': meta.get('description'),
'thumbnail': meta.get('thumbnail'),
'duration': int_or_none(meta.get('duration')),
'timestamp': parse_iso8601(meta.get('created')),
}
for ie in (YoutubeIE, VimeoIE):
if ie.suitable(video_url):
info.update({
'_type': 'url_transparent',
'ie_key': ie.ie_key(),
})
break
return info

View File

@ -4,23 +4,57 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
qualities, qualities,
try_get, try_get,
unified_strdate, unified_strdate,
url_or_none,
) )
# There are different sources of video in arte.tv, the extraction process
# is different for each one. The videos usually expire in 7 days, so we can't
# add tests.
class ArteTVBaseIE(InfoExtractor): class ArteTVBaseIE(InfoExtractor):
def _extract_from_json_url(self, json_url, video_id, lang, title=None): _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
info = self._download_json(json_url, video_id) _API_BASE = 'https://api.arte.tv/api/player/v1'
class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
)
/(?P<id>\d{6}-\d{3}-[AF])
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'info_dict': {
'id': '088501-000-A',
'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive',
'upload_date': '20190628',
},
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'only_matching': True,
}, {
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
info = self._download_json(
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
player_info = info['videoJsonPlayer'] player_info = info['videoJsonPlayer']
vsr = try_get(player_info, lambda x: x['VSR'], dict) vsr = try_get(player_info, lambda x: x['VSR'], dict)
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
if not upload_date_str: if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
title = (player_info.get('VTI') or title or player_info['VID']).strip() title = (player_info.get('VTI') or player_info['VID']).strip()
subtitle = player_info.get('VSU', '').strip() subtitle = player_info.get('VSU', '').strip()
if subtitle: if subtitle:
title += ' - %s' % subtitle title += ' - %s' % subtitle
info_dict = {
'id': player_info['VID'],
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
LANGS = { LANGS = {
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
formats = [] formats = []
for format_id, format_dict in vsr.items(): for format_id, format_dict in vsr.items():
f = dict(format_dict) f = dict(format_dict)
format_url = url_or_none(f.get('url'))
streamer = f.get('streamer')
if not format_url and not streamer:
continue
versionCode = f.get('versionCode') versionCode = f.get('versionCode')
l = re.escape(langcode) l = re.escape(langcode)
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
else: else:
lang_pref = -1 lang_pref = -1
media_type = f.get('mediaType')
if media_type == 'hls':
m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for m3u8_format in m3u8_formats:
m3u8_format['language_preference'] = lang_pref
formats.extend(m3u8_formats)
continue
format = { format = {
'format_id': format_id, 'format_id': format_id,
'preference': -10 if f.get('videoFormat') == 'M3U8' else None, 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
'quality': qfunc(f.get('quality')), 'quality': qfunc(f.get('quality')),
} }
if f.get('mediaType') == 'rtmp': if media_type == 'rtmp':
format['url'] = f['streamer'] format['url'] = f['streamer']
format['play_path'] = 'mp4:' + f['url'] format['play_path'] = 'mp4:' + f['url']
format['ext'] = 'flv' format['ext'] = 'flv'
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
formats.append(format) formats.append(format)
self._check_formats(formats, video_id)
self._sort_formats(formats) self._sort_formats(formats)
info_dict['formats'] = formats return {
return info_dict 'id': player_info.get('VID') or video_id,
'title': title,
'description': player_info.get('VDE'),
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
'formats': formats,
}
class ArteTVPlus7IE(ArteTVBaseIE): class ArteTVEmbedIE(InfoExtractor):
IE_NAME = 'arte.tv:+7' _VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
_TESTS = [{ _TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
'info_dict': { 'info_dict': {
'id': '088501-000-A', 'id': '100605-013-A',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mexico: Stealing Petrol to Survive', 'title': 'United we Stream November Lockdown Edition #13',
'upload_date': '20190628', 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
}, },
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
}] }]
def _real_extract(self, url): @staticmethod
lang, video_id = re.match(self._VALID_URL, url).groups() def _extract_urls(webpage):
return self._extract_from_json_url( return [url for _, url in re.findall(
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
video_id, lang) webpage)]
class ArteTVEmbedIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:embed'
_VALID_URL = r'''(?x)
https://www\.arte\.tv
/player/v3/index\.php\?json_url=
(?P<json_url>
https?://api\.arte\.tv/api/player/v1/config/
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
)
'''
_TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
json_url, lang, video_id = re.match(self._VALID_URL, url).groups() qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
return self._extract_from_json_url(json_url, video_id, lang) json_url = qs['json_url'][0]
video_id = ArteTVIE._match_id(json_url)
return self.url_result(
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
class ArteTVPlaylistIE(ArteTVBaseIE): class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist' _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
_TESTS = [{ _TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'info_dict': { 'info_dict': {
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
'description': 'md5:d322c55011514b3a7241f7fb80d494c2', 'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
}, },
'playlist_mincount': 6, 'playlist_mincount': 6,
}, {
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
lang, playlist_id = re.match(self._VALID_URL, url).groups() lang, playlist_id = re.match(self._VALID_URL, url).groups()
collection = self._download_json( collection = self._download_json(
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' '%s/collectionData/%s/%s?source=videos'
% (lang, playlist_id), playlist_id) % (self._API_BASE, lang, playlist_id), playlist_id)
entries = []
for video in collection['videos']:
if not isinstance(video, dict):
continue
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
if not video_url:
continue
video_id = video.get('programId')
entries.append({
'_type': 'url_transparent',
'url': video_url,
'id': video_id,
'title': video.get('title'),
'alt_title': video.get('subtitle'),
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
'duration': int_or_none(video.get('durationSeconds')),
'view_count': int_or_none(video.get('views')),
'ie_key': ArteTVIE.ie_key(),
})
title = collection.get('title') title = collection.get('title')
description = collection.get('shortDescription') or collection.get('teaserText') description = collection.get('shortDescription') or collection.get('teaserText')
entries = [
self._extract_from_json_url(
video['jsonUrl'], video.get('programId') or playlist_id, lang)
for video in collection['videos'] if video.get('jsonUrl')]
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(entries, playlist_id, title, description)

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
@ -5,10 +6,7 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
@ -17,30 +15,32 @@ from ..utils import (
parse_filesize, parse_filesize,
str_or_none, str_or_none,
try_get, try_get,
unescapeHTML,
update_url_query, update_url_query,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urljoin,
) )
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)' _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439', 'md5': 'c557841d5e50261777a6585648adf439',
'info_dict': { 'info_dict': {
'id': '1812978515', 'id': '1812978515',
'ext': 'mp3', 'ext': 'mp3',
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", 'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
'duration': 9.8485, 'duration': 9.8485,
'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129',
'timestamp': 1354224127,
}, },
'_skip': 'There is a limit of 200 free downloads / month for the test song' '_skip': 'There is a limit of 200 free downloads / month for the test song'
}, { }, {
# free download # free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '853e35bf34aa1d6fe2615ae612564b36',
'info_dict': { 'info_dict': {
'id': '2650410135', 'id': '2650410135',
'ext': 'aiff', 'ext': 'aiff',
@ -79,11 +79,16 @@ class BandcampIE(InfoExtractor):
}, },
}] }]
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
return self._parse_json(self._html_search_regex(
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
attr + ' data', group=2), video_id, fatal=fatal)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) title = self._match_id(url)
title = mobj.group('title')
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None) tralbum = self._extract_data_attr(webpage, title)
thumbnail = self._og_search_thumbnail(webpage)
track_id = None track_id = None
track = None track = None
@ -91,10 +96,7 @@ class BandcampIE(InfoExtractor):
duration = None duration = None
formats = [] formats = []
track_info = self._parse_json( track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
self._search_regex(
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
webpage, 'track info', default='{}'), title)
if track_info: if track_info:
file_ = track_info.get('file') file_ = track_info.get('file')
if isinstance(file_, dict): if isinstance(file_, dict):
@ -111,37 +113,25 @@ class BandcampIE(InfoExtractor):
'abr': int_or_none(abr_str), 'abr': int_or_none(abr_str),
}) })
track = track_info.get('title') track = track_info.get('title')
track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) track_id = str_or_none(
track_info.get('track_id') or track_info.get('id'))
track_number = int_or_none(track_info.get('track_num')) track_number = int_or_none(track_info.get('track_num'))
duration = float_or_none(track_info.get('duration')) duration = float_or_none(track_info.get('duration'))
def extract(key): embed = self._extract_data_attr(webpage, title, 'embed', False)
return self._search_regex( current = tralbum.get('current') or {}
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key, artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
webpage, key, default=None, group='value')
artist = extract('artist')
album = extract('album_title')
timestamp = unified_timestamp( timestamp = unified_timestamp(
extract('publish_date') or extract('album_publish_date')) current.get('publish_date') or tralbum.get('album_publish_date'))
release_date = unified_strdate(extract('album_release_date'))
download_link = self._search_regex( download_link = tralbum.get('freeDownloadPage')
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'download link', default=None, group='url')
if download_link: if download_link:
track_id = self._search_regex( track_id = compat_str(tralbum['id'])
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'track id')
download_webpage = self._download_webpage( download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page') download_link, track_id, 'Downloading free downloads page')
blob = self._parse_json( blob = self._extract_data_attr(download_webpage, track_id, 'blob')
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
track_id, transform_source=unescapeHTML)
info = try_get( info = try_get(
blob, (lambda x: x['digital_items'][0], blob, (lambda x: x['digital_items'][0],
@ -207,20 +197,20 @@ class BandcampIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': artist, 'uploader': artist,
'timestamp': timestamp, 'timestamp': timestamp,
'release_date': release_date, 'release_date': unified_strdate(tralbum.get('album_release_date')),
'duration': duration, 'duration': duration,
'track': track, 'track': track,
'track_number': track_number, 'track_number': track_number,
'track_id': track_id, 'track_id': track_id,
'artist': artist, 'artist': artist,
'album': album, 'album': embed.get('album_title'),
'formats': formats, 'formats': formats,
} }
class BandcampAlbumIE(InfoExtractor): class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album' IE_NAME = 'Bandcamp:album'
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -230,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1353101989', 'id': '1353101989',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Intro', 'title': 'Blazo - Intro',
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
} }
}, },
{ {
@ -238,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '38097443', 'id': '38097443',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Kero One - Keep It Alive (Blazo remix)', 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
'timestamp': 1311757238,
'upload_date': '20110727',
'uploader': 'Blazo',
} }
}, },
], ],
@ -274,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
'title': '"Entropy" EP', 'title': '"Entropy" EP',
'uploader_id': 'jstrecords', 'uploader_id': 'jstrecords',
'id': 'entropy-ep', 'id': 'entropy-ep',
'description': 'md5:0ff22959c943622972596062f2f366a5',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, { }, {
@ -283,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
'id': 'we-are-the-plague', 'id': 'we-are-the-plague',
'title': 'WE ARE THE PLAGUE', 'title': 'WE ARE THE PLAGUE',
'uploader_id': 'insulters', 'uploader_id': 'insulters',
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
}, },
'playlist_count': 2, 'playlist_count': 2,
}] }]
@ -294,41 +292,34 @@ class BandcampAlbumIE(InfoExtractor):
else super(BandcampAlbumIE, cls).suitable(url)) else super(BandcampAlbumIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) uploader_id, album_id = re.match(self._VALID_URL, url).groups()
uploader_id = mobj.group('subdomain')
album_id = mobj.group('album_id')
playlist_id = album_id or uploader_id playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
track_elements = re.findall( tralbum = self._extract_data_attr(webpage, playlist_id)
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage) track_info = tralbum.get('trackinfo')
if not track_elements: if not track_info:
raise ExtractorError('The page doesn\'t contain any tracks') raise ExtractorError('The page doesn\'t contain any tracks')
# Only tracks with duration info have songs # Only tracks with duration info have songs
entries = [ entries = [
self.url_result( self.url_result(
compat_urlparse.urljoin(url, t_path), urljoin(url, t['title_link']), BandcampIE.ie_key(),
ie=BandcampIE.ie_key(), str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
video_title=self._search_regex( for t in track_info
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', if t.get('duration')]
elem_content, 'track title', fatal=False))
for elem_content, t_path in track_elements current = tralbum.get('current') or {}
if self._html_search_meta('duration', elem_content, default=None)]
title = self._html_search_regex(
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
webpage, 'title', fatal=False)
if title:
title = title.replace(r'\"', '"')
return { return {
'_type': 'playlist', '_type': 'playlist',
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'id': playlist_id, 'id': playlist_id,
'title': title, 'title': current.get('title'),
'description': current.get('about'),
'entries': entries, 'entries': entries,
} }
class BandcampWeeklyIE(InfoExtractor): class BandcampWeeklyIE(BandcampIE):
IE_NAME = 'Bandcamp:weekly' IE_NAME = 'Bandcamp:weekly'
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
@ -343,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
'release_date': '20170404', 'release_date': '20170404',
'series': 'Bandcamp Weekly', 'series': 'Bandcamp Weekly',
'episode': 'Magic Moments', 'episode': 'Magic Moments',
'episode_number': 208,
'episode_id': '224', 'episode_id': '224',
} },
'params': {
'format': 'opus-lo',
},
}, { }, {
'url': 'https://bandcamp.com/?blah/blah@&show=228', 'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True 'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) show_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, show_id)
blob = self._parse_json( blob = self._extract_data_attr(webpage, show_id, 'blob')
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
show = blob['bcw_show'] show = blob['bcw_data'][show_id]
# This is desired because any invalid show id redirects to `bandcamp.com`
# which happens to expose the latest Bandcamp Weekly episode.
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
formats = [] formats = []
for format_id, format_url in show['audio_stream'].items(): for format_id, format_url in show['audio_stream'].items():
@ -390,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
if subtitle: if subtitle:
title += ' - %s' % subtitle title += ' - %s' % subtitle
episode_number = None
seq = blob.get('bcw_seq')
if seq and isinstance(seq, list):
try:
episode_number = next(
int_or_none(e.get('episode_number'))
for e in seq
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
except StopIteration:
pass
return { return {
'id': video_id, 'id': show_id,
'title': title, 'title': title,
'description': show.get('desc') or show.get('short_desc'), 'description': show.get('desc') or show.get('short_desc'),
'duration': float_or_none(show.get('audio_duration')), 'duration': float_or_none(show.get('audio_duration')),
@ -411,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
'release_date': unified_strdate(show.get('published_date')), 'release_date': unified_strdate(show.get('published_date')),
'series': 'Bandcamp Weekly', 'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'), 'episode': show.get('subtitle'),
'episode_number': episode_number, 'episode_id': show_id,
'episode_id': compat_str(video_id),
'formats': formats 'formats': formats
} }

View File

@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
parse_iso8601,
# try_get,
update_url_query,
)
class BoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
_TEST = {
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
'info_dict': {
'id': '510727257538',
'ext': 'mp4',
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
'uploader': 'MLS Video',
'timestamp': 1566320259,
'upload_date': '20190820',
'uploader_id': '235196876',
}
}
def _real_extract(self, url):
shared_name, file_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, file_id)
request_token = self._parse_json(self._search_regex(
r'Box\.config\s*=\s*({.+?});', webpage,
'Box config'), file_id)['requestToken']
access_token = self._download_json(
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
'Downloading token JSON metadata',
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
'Content-Type': 'application/json',
'X-Request-Token': request_token,
'X-Box-EndUser-API': 'sharedName=' + shared_name,
})[file_id]['read']
shared_link = 'https://app.box.com/s/' + shared_name
f = self._download_json(
'https://api.box.com/2.0/files/' + file_id, file_id,
'Downloading file JSON metadata', headers={
'Authorization': 'Bearer ' + access_token,
'BoxApi': 'shared_link=' + shared_link,
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
}, query={
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
})
title = f['name']
query = {
'access_token': access_token,
'shared_link': shared_link
}
formats = []
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
# entry_url_template = try_get(
# entry, lambda x: x['content']['url_template'])
# if not entry_url_template:
# continue
# representation = entry.get('representation')
# if representation == 'dash':
# TODO: append query to every fragment URL
# formats.extend(self._extract_mpd_formats(
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
# file_id, query=query))
authenticated_download_url = f.get('authenticated_download_url')
if authenticated_download_url and f.get('is_download_available'):
formats.append({
'ext': f.get('extension') or determine_ext(title),
'filesize': f.get('size'),
'format_id': 'download',
'url': update_url_query(authenticated_download_url, query),
})
self._sort_formats(formats)
creator = f.get('created_by') or {}
return {
'id': file_id,
'title': title,
'formats': formats,
'description': f.get('description') or None,
'uploader': creator.get('name'),
'timestamp': parse_iso8601(f.get('created_at')),
'uploader_id': creator.get('id'),
}

View File

@ -147,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
] ]
@classmethod @classmethod
def _build_brighcove_url(cls, object_str): def _build_brightcove_url(cls, object_str):
""" """
Build a Brightcove url from a xml string containing Build a Brightcove url from a xml string containing
<object class="BrightcoveExperience">{params}</object> <object class="BrightcoveExperience">{params}</object>
@ -217,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return cls._make_brightcove_url(params) return cls._make_brightcove_url(params)
@classmethod @classmethod
def _build_brighcove_url_from_js(cls, object_js): def _build_brightcove_url_from_js(cls, object_js):
# The layout of JS is as follows: # The layout of JS is as follows:
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
# // build Brightcove <object /> XML # // build Brightcove <object /> XML
@ -272,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
).+?>\s*</object>''', ).+?>\s*</object>''',
webpage) webpage)
if matches: if matches:
return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
if matches: if matches:
return list(filter(None, [ return list(filter(None, [
cls._build_brighcove_url_from_js(custom_bc) cls._build_brightcove_url_from_js(custom_bc)
for custom_bc in matches])) for custom_bc in matches]))
return [src for _, src in re.findall( return [src for _, src in re.findall(
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import smuggle_url
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
class CNBCVideoIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
_TEST = { _TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
'info_dict': { 'info_dict': {
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) video_id = self._download_json(
video_id = self._search_regex( 'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, 'query': '''{
'video id') page(path: "%s") {
vcpsId
}
}''' % path,
})['data']['page']['vcpsId']
return self.url_result( return self.url_result(
'http://video.cnbc.com/gallery/?video=%s' % video_id, 'http://video.cnbc.com/gallery/?video=%d' % video_id,
CNBCIE.ie_key()) CNBCIE.ie_key())

View File

@ -1456,9 +1456,10 @@ class InfoExtractor(object):
try: try:
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True return True
except ExtractorError: except ExtractorError as e:
self.to_screen( self.to_screen(
'%s: %s URL is invalid, skipping' % (video_id, item)) '%s: %s URL is invalid, skipping: %s'
% (video_id, item, error_to_compat_str(e.cause)))
return False return False
def http_scheme(self): def http_scheme(self):
@ -1663,7 +1664,7 @@ class InfoExtractor(object):
# just the media without qualities renditions. # just the media without qualities renditions.
# Fortunately, master playlist can be easily distinguished from media # Fortunately, master playlist can be easily distinguished from media
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4] # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
# master playlist tags MUST NOT appear in a media playist and vice versa. # master playlist tags MUST NOT appear in a media playlist and vice versa.
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
# media playlist and MUST NOT appear in master playlist thus we can # media playlist and MUST NOT appear in master playlist thus we can
# clearly detect media playlist with this criterion. # clearly detect media playlist with this criterion.
@ -2595,6 +2596,7 @@ class InfoExtractor(object):
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}): def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = [] formats = []
hdcore_sign = 'hdcore=3.7.0' hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m') f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
hds_host = hosts.get('hds') hds_host = hosts.get('hds')
@ -2607,6 +2609,7 @@ class InfoExtractor(object):
for entry in f4m_formats: for entry in f4m_formats:
entry.update({'extra_param_to_segment_url': hdcore_sign}) entry.update({'extra_param_to_segment_url': hdcore_sign})
formats.extend(f4m_formats) formats.extend(f4m_formats)
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8') m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
hls_host = hosts.get('hls') hls_host = hosts.get('hls')
if hls_host: if hls_host:
@ -2614,6 +2617,31 @@ class InfoExtractor(object):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
http_host = hosts.get('http')
if http_host and 'hdnea=' not in manifest_url:
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
qualities_length = len(qualities)
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
i = 0
http_formats = []
for f in formats:
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
for protocol in ('http', 'https'):
http_f = f.copy()
del http_f['manifest_url']
http_url = re.sub(
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url,
'protocol': protocol,
})
http_formats.append(http_f)
i += 1
formats.extend(http_formats)
return formats return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]): def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):

View File

@ -16,6 +16,8 @@ from ..utils import (
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
parse_iso8601, parse_iso8601,
strip_or_none,
try_get,
) )
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
'uploader': 'gq', 'uploader': 'gq',
'upload_date': '20170321', 'upload_date': '20170321',
'timestamp': 1490126427, 'timestamp': 1490126427,
'description': 'How much grimmer would things be if these people were competent?',
}, },
}, { }, {
# JS embed # JS embed
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
'title': '3D printed TSA Travel Sentry keys really do open TSA locks', 'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
'uploader': 'arstechnica', 'uploader': 'arstechnica',
'upload_date': '20150916', 'upload_date': '20150916',
'timestamp': 1442434955, 'timestamp': 1442434920,
} }
}, { }, {
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
}) })
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {}
for t, caption in video_info.get('captions', {}).items():
caption_url = caption.get('src')
if not (t in ('vtt', 'srt', 'tml') and caption_url):
continue
subtitles.setdefault('en', []).append({'url': caption_url})
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
'season': video_info.get('season_title'), 'season': video_info.get('season_title'),
'timestamp': parse_iso8601(video_info.get('premiere_date')), 'timestamp': parse_iso8601(video_info.get('premiere_date')),
'categories': video_info.get('categories'), 'categories': video_info.get('categories'),
'subtitles': subtitles,
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
if url_type == 'series': if url_type == 'series':
return self._extract_series(url, webpage) return self._extract_series(url, webpage)
else: else:
params = self._extract_video_params(webpage, display_id) video = try_get(self._parse_json(self._search_regex(
info = self._search_json_ld( r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
webpage, display_id, fatal=False) 'preload state', '{}'), display_id),
lambda x: x['transformed']['video'])
if video:
params = {'videoId': video['id']}
info = {'description': strip_or_none(video.get('description'))}
else:
params = self._extract_video_params(webpage, display_id)
info = self._search_json_ld(
webpage, display_id, fatal=False)
info.update(self._extract_video(params)) info.update(self._extract_video(params))
return info return info

View File

@ -7,7 +7,7 @@ from .dplay import DPlayIE
class DiscoveryNetworksDeIE(DPlayIE): class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
}, { }, {
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
title = get_item('title', preferred_langs) or video_id title = get_item('title', preferred_langs) or video_id
description = get_item('description', preferred_langs) description = get_item('description', preferred_langs)
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail') thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date')) upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration')) duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
view_count = int_or_none(xpath_text(playlist, './info/views', 'views')) view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnmail, 'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'duration': duration, 'duration': duration,
'view_count': view_count, 'view_count': view_count,

View File

@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .amcnetworks import AMCNetworksIE from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE from .animeondemand import AnimeOnDemandIE
@ -58,7 +59,7 @@ from .ard import (
ARDMediathekIE, ARDMediathekIE,
) )
from .arte import ( from .arte import (
ArteTVPlus7IE, ArteTVIE,
ArteTVEmbedIE, ArteTVEmbedIE,
ArteTVPlaylistIE, ArteTVPlaylistIE,
) )
@ -121,6 +122,7 @@ from .blinkx import BlinkxIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bostonglobe import BostonGlobeIE from .bostonglobe import BostonGlobeIE
from .box import BoxIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import ( from .br import (
BRIE, BRIE,
@ -529,6 +531,7 @@ from .laola1tv import (
EHFTVIE, EHFTVIE,
ITTFIE, ITTFIE,
) )
from .lbry import LBRYIE
from .lci import LCIIE from .lci import LCIIE
from .lcp import ( from .lcp import (
LcpPlayIE, LcpPlayIE,
@ -780,6 +783,7 @@ from .ntvru import NTVRuIE
from .nytimes import ( from .nytimes import (
NYTimesIE, NYTimesIE,
NYTimesArticleIE, NYTimesArticleIE,
NYTimesCookingIE,
) )
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .nzz import NZZIE from .nzz import NZZIE
@ -842,6 +846,10 @@ from .picarto import (
) )
from .piksel import PikselIE from .piksel import PikselIE
from .pinkbike import PinkbikeIE from .pinkbike import PinkbikeIE
from .pinterest import (
PinterestIE,
PinterestCollectionIE,
)
from .pladform import PladformIE from .pladform import PladformIE
from .platzi import ( from .platzi import (
PlatziIE, PlatziIE,
@ -957,6 +965,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
from .rtvnh import RTVNHIE from .rtvnh import RTVNHIE
from .rtvs import RTVSIE from .rtvs import RTVSIE
from .ruhd import RUHDIE from .ruhd import RUHDIE
from .rumble import RumbleEmbedIE
from .rutube import ( from .rutube import (
RutubeIE, RutubeIE,
RutubeChannelIE, RutubeChannelIE,
@ -1007,6 +1016,16 @@ from .shared import (
from .showroomlive import ShowRoomLiveIE from .showroomlive import ShowRoomLiveIE
from .sina import SinaIE from .sina import SinaIE
from .sixplay import SixPlayIE from .sixplay import SixPlayIE
from .skyit import (
SkyItPlayerIE,
SkyItVideoIE,
SkyItVideoLiveIE,
SkyItIE,
SkyItAcademyIE,
SkyItArteIE,
CieloTVItIE,
TV8ItIE,
)
from .skylinewebcams import SkylineWebcamsIE from .skylinewebcams import SkylineWebcamsIE
from .skynewsarabia import ( from .skynewsarabia import (
SkyNewsArabiaIE, SkyNewsArabiaIE,
@ -1053,8 +1072,7 @@ from .spankbang import (
SpankBangPlaylistIE, SpankBangPlaylistIE,
) )
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegel import SpiegelIE
from .spiegeltv import SpiegeltvIE
from .spike import ( from .spike import (
BellatorIE, BellatorIE,
ParamountNetworkIE, ParamountNetworkIE,
@ -1357,7 +1375,6 @@ from .vk import (
from .vlive import ( from .vlive import (
VLiveIE, VLiveIE,
VLiveChannelIE, VLiveChannelIE,
VLivePlaylistIE
) )
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vodpl import VODPlIE from .vodpl import VODPlIE
@ -1474,21 +1491,17 @@ from .yourporn import YourPornIE
from .yourupload import YourUploadIE from .yourupload import YourUploadIE
from .youtube import ( from .youtube import (
YoutubeIE, YoutubeIE,
YoutubeChannelIE,
YoutubeFavouritesIE,
YoutubeHistoryIE, YoutubeHistoryIE,
YoutubeLiveIE, YoutubeTabIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubePlaylistsIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,
YoutubeSearchDateIE, YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
YoutubeSearchURLIE, #YoutubeSearchURLIE,
YoutubeShowIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeYtUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE

View File

@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse', 'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
'description': 'md5:401969c5d318c061f86bda1fa359292b', 'description': 'md5:401969c5d318c061f86bda1fa359292b',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20160907', 'upload_date': '20160907',
}, },
} }
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
upload_date_str = self._search_regex( upload_date_str = self._search_regex(
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date, 'upload_date': upload_date,
'formats': [{ 'formats': [{
'url': video_url, 'url': video_url,

View File

@ -17,6 +17,7 @@ from ..utils import (
parse_duration, parse_duration,
try_get, try_get,
url_or_none, url_or_none,
urljoin,
) )
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
is_live = None is_live = None
formats = [] videos = []
for video in info['videos']:
if video['statut'] != 'ONLINE': for video in (info.get('videos') or []):
if video.get('statut') != 'ONLINE':
continue continue
video_url = video['url'] if not video.get('url'):
continue
videos.append(video)
if not videos:
for device_type in ['desktop', 'mobile']:
fallback_info = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
video_id, 'Downloading fallback %s video JSON' % device_type, query={
'device_type': device_type,
'browser': 'chrome',
}, fatal=False)
if fallback_info and fallback_info.get('video'):
videos.append(fallback_info['video'])
formats = []
for video in videos:
video_url = video.get('url')
if not video_url: if not video_url:
continue continue
if is_live is None: if is_live is None:
is_live = (try_get( is_live = (try_get(
video, lambda x: x['plages_ouverture'][0]['direct'], video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
bool) is True) or '/live.francetv.fr/' in video_url or video.get('is_live') is True
format_id = video['format'] or '/live.francetv.fr/' in video_url)
format_id = video.get('format')
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'f4m': if ext == 'f4m':
if georestricted: if georestricted:
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
sign(video_url, format_id), video_id, 'mp4', sign(video_url, format_id), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id, entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False)) fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
elif video_url.startswith('rtmp'): elif video_url.startswith('rtmp'):
formats.append({ formats.append({
'url': video_url, 'url': video_url,
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
'url': video_url, 'url': video_url,
'format_id': format_id, 'format_id': format_id,
}) })
self._sort_formats(formats) self._sort_formats(formats)
title = info['titre'] title = info['titre']
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': self._live_title(title) if is_live else title, 'title': self._live_title(title) if is_live else title,
'description': clean_html(info['synopsis']), 'description': clean_html(info.get('synopsis')),
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']), 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
'timestamp': int_or_none(info['diffusion']['timestamp']), 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
'is_live': is_live, 'is_live': is_live,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,

View File

@ -91,6 +91,7 @@ from .piksel import PikselIE
from .videa import VideaIE from .videa import VideaIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE from .ustream import UstreamIE
from .arte import ArteTVEmbedIE
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .rutube import RutubeIE from .rutube import RutubeIE
from .limelight import LimelightBaseIE from .limelight import LimelightBaseIE
@ -841,7 +842,7 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
} }
}, },
# MTVSercices embed # MTVServices embed
{ {
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html', 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
'md5': 'ca1aef97695ef2c1d6973256a57e5252', 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
@ -2760,11 +2761,9 @@ class GenericIE(InfoExtractor):
return self.url_result(ustream_url, UstreamIE.ie_key()) return self.url_result(ustream_url, UstreamIE.ie_key())
# Look for embedded arte.tv player # Look for embedded arte.tv player
mobj = re.search( arte_urls = ArteTVEmbedIE._extract_urls(webpage)
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"', if arte_urls:
webpage) return self.playlist_from_matches(arte_urls, video_id, video_title)
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
# Look for embedded francetv player # Look for embedded francetv player
mobj = re.search( mobj = re.search(

View File

@ -3,11 +3,13 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
try_get,
update_url_query, update_url_query,
) )
@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
# video can't be watched anonymously due to view count limit reached, # video can't be watched anonymously due to view count limit reached,
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046) # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view', 'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e', 'only_matching': True,
'info_dict': {
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
'ext': 'mp4',
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
}
}, { }, {
# video id is longer than 28 characters # video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
'info_dict': {
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
'ext': 'mp4',
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
'duration': 189,
},
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28', 'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( video_info = compat_parse_qs(self._download_webpage(
'http://docs.google.com/file/d/%s' % video_id, video_id) 'https://drive.google.com/get_video_info',
video_id, query={'docid': video_id}))
title = self._search_regex( def get_value(key):
r'"title"\s*,\s*"([^"]+)', webpage, 'title', return try_get(video_info, lambda x: x[key][0])
default=None) or self._og_search_title(webpage)
duration = int_or_none(self._search_regex( reason = get_value('reason')
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', title = get_value('title')
default=None)) if not title and reason:
raise ExtractorError(reason, expected=True)
formats = [] formats = []
fmt_stream_map = self._search_regex( fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, fmt_list = (get_value('fmt_list') or '').split(',')
'fmt stream map', default='').split(',')
fmt_list = self._search_regex(
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
'fmt_list', default='').split(',')
if fmt_stream_map and fmt_list: if fmt_stream_map and fmt_list:
resolutions = {} resolutions = {}
for fmt in fmt_list: for fmt in fmt_list:
@ -257,19 +246,14 @@ class GoogleDriveIE(InfoExtractor):
if urlh and urlh.headers.get('Content-Disposition'): if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh) add_source_format(urlh)
if not formats: if not formats and reason:
reason = self._search_regex( raise ExtractorError(reason, expected=True)
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
if reason:
raise ExtractorError(reason, expected=True)
self._sort_formats(formats) self._sort_formats(formats)
hl = self._search_regex( hl = get_value('hl')
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
subtitles_id = None subtitles_id = None
ttsurl = self._search_regex( ttsurl = get_value('ttsurl')
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
if ttsurl: if ttsurl:
# the video Id for subtitles will be the last value in the ttsurl # the video Id for subtitles will be the last value in the ttsurl
# query string # query string
@ -279,8 +263,8 @@ class GoogleDriveIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': self._og_search_thumbnail(webpage, default=None), 'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
'duration': duration, 'duration': int_or_none(get_value('length_seconds')),
'formats': formats, 'formats': formats,
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
'automatic_captions': self.extract_automatic_captions( 'automatic_captions': self.extract_automatic_captions(

View File

@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
def _extract_rtmp_video(self, webpage): def _extract_rtmp_video(self, webpage):
# The server URL is hardcoded # The server URL is hardcoded
video_url = 'rtmpe://video.infoq.com/cfx/st/' video_url = 'rtmpe://videof.infoq.com/cfx/st/'
# Extract video URL # Extract video URL
encoded_id = self._search_regex( encoded_id = self._search_regex(
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
return [{ return [{
'format_id': 'http_video', 'format_id': 'http_video',
'url': http_video_url, 'url': http_video_url,
'http_headers': {'Referer': 'https://www.infoq.com/'},
}] }]
def _extract_http_audio(self, webpage, video_id): def _extract_http_audio(self, webpage, video_id):
fields = self._hidden_inputs(webpage) fields = self._form_hidden_inputs('mp3Form', webpage)
http_audio_url = fields.get('filename') http_audio_url = fields.get('filename')
if not http_audio_url: if not http_audio_url:
return [] return []
# base URL is found in the Location header in the response returned by # base URL is found in the Location header in the response returned by
# GET https://www.infoq.com/mp3download.action?filename=... when logged in. # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url) http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage)) http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
# audio file seem to be missing some times even if there is a download link # audio file seem to be missing some times even if there is a download link

View File

@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000) duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT') description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME') thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate')) creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content') quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = [] formats = []
@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
'duration': duration, 'duration': duration,
'formats': formats, 'formats': formats,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': createtion_time, 'timestamp': creation_time,
} }

View File

@ -0,0 +1,91 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
try_get,
)
class LBRYIE(InfoExtractor):
IE_NAME = 'lbry.tv'
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
_TESTS = [{
# Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
'info_dict': {
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
'ext': 'mp4',
'title': 'First day in LBRY? Start HERE!',
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
'timestamp': 1595694354,
'upload_date': '20200725',
}
}, {
# Audio
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
'info_dict': {
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'ext': 'mp3',
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
'timestamp': 1591312601,
'upload_date': '20200604',
}
}, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
'only_matching': True,
}, {
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
'only_matching': True,
}]
def _call_api_proxy(self, method, display_id, params):
return self._download_json(
'https://api.lbry.tv/api/v1/proxy', display_id,
headers={'Content-Type': 'application/json-rpc'},
data=json.dumps({
'method': method,
'params': params,
}).encode())['result']
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')
uri = 'lbry://' + display_id
result = self._call_api_proxy(
'resolve', display_id, {'urls': [uri]})[uri]
result_value = result['value']
if result_value.get('stream_type') not in ('video', 'audio'):
raise ExtractorError('Unsupported URL', expected=True)
streaming_url = self._call_api_proxy(
'get', display_id, {'uri': uri})['streaming_url']
source = result_value.get('source') or {}
media = result_value.get('video') or result_value.get('audio') or {}
signing_channel = result_value.get('signing_channel') or {}
return {
'id': result['claim_id'],
'title': result_value['title'],
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
'description': result_value.get('description'),
'license': result_value.get('license'),
'timestamp': int_or_none(result.get('timestamp')),
'tags': result_value.get('tags'),
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
'duration': int_or_none(media.get('duration')),
'channel': signing_channel.get('name'),
'channel_id': signing_channel.get('claim_id'),
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
'filesize': int_or_none(source.get('size')),
'url': streaming_url,
}

View File

@ -5,28 +5,26 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, clean_html,
int_or_none, merge_dicts,
parse_duration,
remove_end,
) )
class LRTIE(InfoExtractor): class LRTIE(InfoExtractor):
IE_NAME = 'lrt.lt' IE_NAME = 'lrt.lt'
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
_TESTS = [{ _TESTS = [{
# m3u8 download # m3u8 download
'url': 'http://www.lrt.lt/mediateka/irasas/54391/', 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0', 'md5': '85cb2bb530f31d91a9c65b479516ade4',
'info_dict': { 'info_dict': {
'id': '54391', 'id': '2000127261',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Septynios Kauno dienos', 'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
'description': 'md5:24d84534c7dc76581e59f5689462411a', 'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
'duration': 1783, 'duration': 3035,
'view_count': int, 'timestamp': 1604079000,
'like_count': int, 'upload_date': '20201030',
}, },
}, { }, {
# direct mp3 download # direct mp3 download
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
}, },
}] }]
def _extract_js_var(self, webpage, var_name, default):
return self._search_regex(
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
webpage, var_name.replace('_', ' '), default, group=2)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) path, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - LRT') media_url = self._extract_js_var(webpage, 'main_url', path)
media = self._download_json(self._extract_js_var(
webpage, 'media_info_url',
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
video_id, query={'url': media_url})
jw_data = self._parse_jwplayer_data(
media['playlist_item'], video_id, base_url=url)
formats = [] json_ld_data = self._search_json_ld(webpage, video_id)
for _, file_url in re.findall(
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): tags = []
ext = determine_ext(file_url) for tag in (media.get('tags') or []):
if ext not in ('m3u8', 'mp3'): tag_name = tag.get('name')
if not tag_name:
continue continue
# mp3 served as m3u8 produces stuttered media file tags.append(tag_name)
if ext == 'm3u8' and '.mp3' in file_url:
continue
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
fatal=False))
elif ext == 'mp3':
formats.append({
'url': file_url,
'vcodec': 'none',
})
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage) clean_info = {
description = self._og_search_description(webpage) 'description': clean_html(media.get('content')),
duration = parse_duration(self._search_regex( 'tags': tags,
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
webpage, 'duration', default=None, group='duration'))
view_count = int_or_none(self._html_search_regex(
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
webpage, 'view count', fatal=False, group='count'))
like_count = int_or_none(self._search_regex(
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
webpage, 'like count', fatal=False, group='count'))
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': thumbnail,
'description': description,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
} }
return merge_dicts(clean_info, jw_data, json_ld_data)

View File

@ -1,10 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import merge_dicts from ..utils import (
clean_html,
dict_get,
float_or_none,
int_or_none,
merge_dicts,
parse_duration,
try_get,
)
class MallTVIE(InfoExtractor): class MallTVIE(InfoExtractor):
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice', 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'ext': 'mp4', 'ext': 'mp4',
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?', 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb', 'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
'duration': 216, 'duration': 216,
'timestamp': 1538870400, 'timestamp': 1538870400,
'upload_date': '20181007', 'upload_date': '20181007',
@ -37,20 +43,46 @@ class MallTVIE(InfoExtractor):
webpage = self._download_webpage( webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers()) url, display_id, headers=self.geo_verification_headers())
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b' video = self._parse_json(self._search_regex(
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
webpage, 'video object'), display_id)
video_source = video['VideoSource']
video_id = self._search_regex( video_id = self._search_regex(
SOURCE_RE, webpage, 'video id', group='id') r'/([\da-z]+)/index\b', video_source, 'video id')
media = self._parse_html5_media_entries( formats = self._extract_m3u8_formats(
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id, video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0] self._sort_formats(formats)
subtitles = {}
for s in (video.get('Subtitles') or {}):
s_url = s.get('Url')
if not s_url:
continue
subtitles.setdefault(s.get('Language') or 'cz', []).append({
'url': s_url,
})
entity_counts = video.get('EntityCounts') or {}
def get_count(k):
v = entity_counts.get(k + 's') or {}
return int_or_none(dict_get(v, ('Count', 'StrCount')))
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(media, info, { return merge_dicts({
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': self._og_search_title(webpage, default=None) or display_id, 'title': video.get('Title'),
'description': self._og_search_description(webpage, default=None), 'description': clean_html(video.get('Description')),
'thumbnail': self._og_search_thumbnail(webpage, default=None), 'thumbnail': video.get('ThumbnailUrl'),
}) 'formats': formats,
'subtitles': subtitles,
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
'view_count': get_count('View'),
'like_count': get_count('Like'),
'dislike_count': get_count('Dislike'),
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
'comment_count': get_count('Comment'),
}, info)

View File

@ -17,9 +17,8 @@ from ..utils import (
class MGTVIE(InfoExtractor): class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV' IE_DESC = '芒果TV'
_GEO_COUNTRIES = ['CN']
_TESTS = [{ _TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
}, { }, {
'url': 'http://www.mgtv.com/b/301817/3826653.html', 'url': 'http://www.mgtv.com/b/301817/3826653.html',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://w.mgtv.com/b/301817/3826653.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
try: try:
api_data = self._download_json( api_data = self._download_json(
'https://pcweb.api.mgtv.com/player/video', video_id, query={ 'https://pcweb.api.mgtv.com/player/video', video_id, query={
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1], 'tk2': tk2,
'video_id': video_id, 'video_id': video_id,
}, headers=self.geo_verification_headers())['data'] }, headers=self.geo_verification_headers())['data']
except ExtractorError as e: except ExtractorError as e:
@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
stream_data = self._download_json( stream_data = self._download_json(
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={ 'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
'pm2': api_data['atc']['pm2'], 'pm2': api_data['atc']['pm2'],
'tk2': tk2,
'video_id': video_id, 'video_id': video_id,
}, headers=self.geo_verification_headers())['data'] }, headers=self.geo_verification_headers())['data']
stream_domain = stream_data['stream_domain'][0] stream_domain = stream_data['stream_domain'][0]

View File

@ -349,6 +349,18 @@ class MTVIE(MTVServicesInfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def extract_child_with_type(parent, t):
children = parent['children']
return next(c for c in children if c.get('type') == t)
def _extract_mgid(self, webpage):
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self.extract_child_with_type(data, 'MainContainer')
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
return video_player['props']['media']['video']['config']['uri']
class MTVJapanIE(MTVServicesInfoExtractor): class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvjapan' IE_NAME = 'mtvjapan'

View File

@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
js_to_json,
parse_duration, parse_duration,
smuggle_url, smuggle_url,
try_get, try_get,
@ -394,8 +393,8 @@ class NBCNewsIE(ThePlatformIE):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data = self._parse_json(self._search_regex( data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.+});', webpage, r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
'bootstrap json'), video_id, js_to_json) webpage, 'bootstrap json'), video_id)['props']['initialState']
video_data = try_get(data, lambda x: x['video']['current'], dict) video_data = try_get(data, lambda x: x['video']['current'], dict)
if not video_data: if not video_data:
video_data = data['article']['content'][0]['primaryMedia']['video'] video_data = data['article']['content'][0]['primaryMedia']['video']

View File

@ -81,6 +81,29 @@ class NDRIE(NDRBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# with subtitles
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
'info_dict': {
'id': 'extra18674',
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
'ext': 'mp4',
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
'uploader': 'ndrtv',
'upload_date': '20201113',
'duration': 1749,
'subtitles': {
'de': [{
'ext': 'ttml',
'url': r're:^https://www\.ndr\.de.+',
}],
},
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, { }, {
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
'only_matching': True, 'only_matching': True,
@ -239,6 +262,20 @@ class NDREmbedBaseIE(InfoExtractor):
'preference': quality_key(thumbnail.get('quality')), 'preference': quality_key(thumbnail.get('quality')),
}) })
subtitles = {}
tracks = config.get('tracks')
if tracks and isinstance(tracks, list):
for track in tracks:
if not isinstance(track, dict):
continue
track_url = urljoin(url, track.get('src'))
if not track_url:
continue
subtitles.setdefault(track.get('srclang') or 'de', []).append({
'url': track_url,
'ext': 'ttml',
})
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -248,6 +285,7 @@ class NDREmbedBaseIE(InfoExtractor):
'duration': duration, 'duration': duration,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
} }

View File

@ -33,7 +33,7 @@ class NprIE(InfoExtractor):
}, },
}], }],
}, { }, {
# mutlimedia, not media title # multimedia, not media title
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert', 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
'info_dict': { 'info_dict': {
'id': '533198237', 'id': '533198237',

View File

@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE):
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'), r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
webpage, 'podcast data') webpage, 'podcast data')
return self._extract_podcast_from_json(podcast_data, page_id, webpage) return self._extract_podcast_from_json(podcast_data, page_id, webpage)
class NYTimesCookingIE(NYTimesBaseIE):
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
_TESTS = [{
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
'info_dict': {
'id': '100000004756089',
'ext': 'mov',
'timestamp': 1479383008,
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
'title': 'Cranberry Tart',
'upload_date': '20161117',
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
},
}, {
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
'info_dict': {
'id': '100000003951728',
'ext': 'mov',
'timestamp': 1445509539,
'description': 'Turkey guide',
'upload_date': '20151022',
'title': 'Turkey',
}
}]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
video_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'video id')
return self._extract_video_from_id(video_id)

View File

@ -477,7 +477,7 @@ class PBSIE(InfoExtractor):
if media_id: if media_id:
return media_id, presumptive_id, upload_date, description return media_id, presumptive_id, upload_date, description
# Fronline video embedded via flp # Frontline video embedded via flp
video_id = self._search_regex( video_id = self._search_regex(
r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None) r'videoid\s*:\s*"([\d+a-z]{7,})"', webpage, 'videoid', default=None)
if video_id: if video_id:

View File

@ -0,0 +1,176 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
try_get,
unified_timestamp,
url_or_none,
)
class PinterestBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
def _extract_resource(self, webpage, video_id):
return self._parse_json(
self._search_regex(
r'<script[^>]+\bid=["\']initial-state["\'][^>]*>({.+?})</script>',
webpage, 'application json'),
video_id)['resourceResponses']
def _extract_video(self, data, extract_formats=True):
video_id = data['id']
title = (data.get('title') or data.get('grid_title') or video_id).strip()
formats = []
duration = None
if extract_formats:
for format_id, format_dict in data['videos']['video_list'].items():
if not isinstance(format_dict, dict):
continue
format_url = url_or_none(format_dict.get('url'))
if not format_url:
continue
duration = float_or_none(format_dict.get('duration'), scale=1000)
ext = determine_ext(format_url)
if 'hls' in format_id.lower() or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
'width': int_or_none(format_dict.get('width')),
'height': int_or_none(format_dict.get('height')),
'duration': duration,
})
self._sort_formats(
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
description = data.get('description') or data.get('description_html') or data.get('seo_description')
timestamp = unified_timestamp(data.get('created_at'))
def _u(field):
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
uploader = _u('full_name')
uploader_id = _u('id')
repost_count = int_or_none(data.get('repin_count'))
comment_count = int_or_none(data.get('comment_count'))
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
tags = data.get('hashtags')
thumbnails = []
images = data.get('images')
if isinstance(images, dict):
for thumbnail_id, thumbnail in images.items():
if not isinstance(thumbnail, dict):
continue
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail.get('width')),
'height': int_or_none(thumbnail.get('height')),
})
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'thumbnails': thumbnails,
'uploader': uploader,
'uploader_id': uploader_id,
'repost_count': repost_count,
'comment_count': comment_count,
'categories': categories,
'tags': tags,
'formats': formats,
'extractor_key': PinterestIE.ie_key(),
}
class PinterestIE(PinterestBaseIE):
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.pinterest.com/pin/664281013778109217/',
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
'info_dict': {
'id': '664281013778109217',
'ext': 'mp4',
'title': 'Origami',
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
'duration': 57.7,
'timestamp': 1593073622,
'upload_date': '20200625',
'uploader': 'Love origami -I am Dafei',
'uploader_id': '586523688879454212',
'repost_count': 50,
'comment_count': 0,
'categories': list,
'tags': list,
},
}, {
'url': 'https://co.pinterest.com/pin/824721750502199491/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data = self._extract_resource(webpage, video_id)[0]['response']['data']
return self._extract_video(data)
class PinterestCollectionIE(PinterestBaseIE):
_VALID_URL = r'%s/[^/]+/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
_TESTS = [{
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
'info_dict': {
'id': '585890301462791043',
'title': 'cool diys',
},
'playlist_count': 8,
}]
@classmethod
def suitable(cls, url):
return False if PinterestIE.suitable(url) else super(
PinterestCollectionIE, cls).suitable(url)
def _real_extract(self, url):
collection_name = self._match_id(url)
webpage = self._download_webpage(url, collection_name)
resource = self._extract_resource(webpage, collection_name)[1]
entries = []
for item in resource['response']['data']:
if not isinstance(item, dict) or item.get('type') != 'pin':
continue
video_id = item.get('id')
if video_id:
# Some pins may not be available anonymously via pin URL
# video = self._extract_video(item, extract_formats=False)
# video.update({
# '_type': 'url_transparent',
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
# })
# entries.append(video)
entries.append(self._extract_video(item))
title = try_get(
resource, lambda x: x['options']['board_title'], compat_str)
collection_id = try_get(
resource, lambda x: x['options']['board_id'],
compat_str) or collection_name
return self.playlist_result(
entries, playlist_id=collection_id, playlist_title=title)

View File

@ -1,3 +1,4 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -15,9 +16,9 @@ from ..utils import (
GeoRestrictedError, GeoRestrictedError,
int_or_none, int_or_none,
parse_duration, parse_duration,
remove_start,
strip_or_none, strip_or_none,
try_get, try_get,
unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -67,7 +68,7 @@ class RaiBaseIE(InfoExtractor):
# This does not imply geo restriction (e.g. # This does not imply geo restriction (e.g.
# http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
if media_url == 'http://download.rai.it/video_no_available.mp4': if '/video_no_available.mp4' in media_url:
continue continue
ext = determine_ext(media_url) ext = determine_ext(media_url)
@ -122,40 +123,20 @@ class RaiBaseIE(InfoExtractor):
class RaiPlayIE(RaiBaseIE): class RaiPlayIE(RaiBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s))\.(?:html|json)' % RaiBaseIE._UUID_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
'md5': '340aa3b7afb54bfd14a8c11786450d76',
'info_dict': {
'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
'ext': 'mp4',
'title': 'La Casa Bianca',
'alt_title': 'S2016 - Puntata del 23/10/2016',
'description': 'md5:a09d45890850458077d1f68bb036e0a5',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 3',
'creator': 'Rai 3',
'duration': 3278,
'timestamp': 1477764300,
'upload_date': '20161029',
'series': 'La Casa Bianca',
'season': '2016',
},
}, {
'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
'info_dict': { 'info_dict': {
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Report del 07/04/2014', 'title': 'Report del 07/04/2014',
'alt_title': 'S2013/14 - Puntata del 07/04/2014', 'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 5', 'uploader': 'Rai Gulp',
'creator': 'Rai 5',
'duration': 6160, 'duration': 6160,
'series': 'Report', 'series': 'Report',
'season_number': 5,
'season': '2013/14', 'season': '2013/14',
}, },
'params': { 'params': {
@ -167,48 +148,52 @@ class RaiPlayIE(RaiBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) base, video_id = re.match(self._VALID_URL, url).groups()
url, video_id = mobj.group('url', 'id')
media = self._download_json( media = self._download_json(
'%s?json' % url, video_id, 'Downloading video JSON') base + '.json', video_id, 'Downloading video JSON')
title = media['name'] title = media['name']
video = media['video'] video = media['video']
relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) relinker_info = self._extract_relinker_info(video['content_url'], video_id)
self._sort_formats(relinker_info['formats']) self._sort_formats(relinker_info['formats'])
thumbnails = [] thumbnails = []
if 'images' in media: for _, value in media.get('images', {}).items():
for _, value in media.get('images').items(): if value:
if value: thumbnails.append({
thumbnails.append({ 'url': urljoin(url, value),
'url': value.replace('[RESOLUTION]', '600x400') })
})
timestamp = unified_timestamp(try_get( date_published = media.get('date_published')
media, lambda x: x['availabilities'][0]['start'], compat_str)) time_published = media.get('time_published')
if date_published and time_published:
date_published += ' ' + time_published
subtitles = self._extract_subtitles(url, video.get('subtitles')) subtitles = self._extract_subtitles(url, video.get('subtitles'))
program_info = media.get('program_info') or {}
season = media.get('season')
info = { info = {
'id': video_id, 'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
'display_id': video_id,
'title': self._live_title(title) if relinker_info.get( 'title': self._live_title(title) if relinker_info.get(
'is_live') else title, 'is_live') else title,
'alt_title': media.get('subtitle'), 'alt_title': strip_or_none(media.get('subtitle')),
'description': media.get('description'), 'description': media.get('description'),
'uploader': strip_or_none(media.get('channel')), 'uploader': strip_or_none(media.get('channel')),
'creator': strip_or_none(media.get('editor')), 'creator': strip_or_none(media.get('editor') or None),
'duration': parse_duration(video.get('duration')), 'duration': parse_duration(video.get('duration')),
'timestamp': timestamp, 'timestamp': unified_timestamp(date_published),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'series': try_get( 'series': program_info.get('name'),
media, lambda x: x['isPartOf']['name'], compat_str), 'season_number': int_or_none(season),
'season_number': int_or_none(try_get( 'season': season if (season and not season.isdigit()) else None,
media, lambda x: x['isPartOf']['numeroStagioni'])), 'episode': media.get('episode_title'),
'season': media.get('stagione') or None, 'episode_number': int_or_none(media.get('episode')),
'subtitles': subtitles, 'subtitles': subtitles,
} }
@ -216,16 +201,16 @@ class RaiPlayIE(RaiBaseIE):
return info return info
class RaiPlayLiveIE(RaiBaseIE): class RaiPlayLiveIE(RaiPlayIE):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))'
_TEST = { _TESTS = [{
'url': 'http://www.raiplay.it/dirette/rainews24', 'url': 'http://www.raiplay.it/dirette/rainews24',
'info_dict': { 'info_dict': {
'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
'display_id': 'rainews24', 'display_id': 'rainews24',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:6eca31500550f9376819f174e5644754', 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497',
'uploader': 'Rai News 24', 'uploader': 'Rai News 24',
'creator': 'Rai News 24', 'creator': 'Rai News 24',
'is_live': True, 'is_live': True,
@ -233,58 +218,50 @@ class RaiPlayLiveIE(RaiBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
} }]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE,
webpage, 'content id')
return {
'_type': 'url_transparent',
'ie_key': RaiPlayIE.ie_key(),
'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id,
'id': video_id,
'display_id': display_id,
}
class RaiPlayPlaylistIE(InfoExtractor): class RaiPlayPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
'info_dict': { 'info_dict': {
'id': 'nondirloalmiocapo', 'id': 'nondirloalmiocapo',
'title': 'Non dirlo al mio capo', 'title': 'Non dirlo al mio capo',
'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b',
}, },
'playlist_mincount': 12, 'playlist_mincount': 12,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) base, playlist_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, playlist_id) program = self._download_json(
base + '.json', playlist_id, 'Downloading program JSON')
title = self._html_search_meta(
('programma', 'nomeProgramma'), webpage, 'title')
description = unescapeHTML(self._html_search_meta(
('description', 'og:description'), webpage, 'description'))
entries = [] entries = []
for mobj in re.finditer( for b in (program.get('blocks') or []):
r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', for s in (b.get('sets') or []):
webpage): s_id = s.get('id')
video_url = urljoin(url, mobj.group('path')) if not s_id:
entries.append(self.url_result( continue
video_url, ie=RaiPlayIE.ie_key(), medias = self._download_json(
video_id=RaiPlayIE._match_id(video_url))) '%s/%s.json' % (base, s_id), s_id,
'Downloading content set JSON', fatal=False)
if not medias:
continue
for m in (medias.get('items') or []):
path_id = m.get('path_id')
if not path_id:
continue
video_url = urljoin(url, path_id)
entries.append(self.url_result(
video_url, ie=RaiPlayIE.ie_key(),
video_id=RaiPlayIE._match_id(video_url)))
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(
entries, playlist_id, program.get('name'),
try_get(program, lambda x: x['program_info']['description']))
class RaiIE(RaiBaseIE): class RaiIE(RaiBaseIE):
@ -300,7 +277,8 @@ class RaiIE(RaiBaseIE):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1758, 'duration': 1758,
'upload_date': '20140612', 'upload_date': '20140612',
} },
'skip': 'This content is available only in Italy',
}, { }, {
# with ContentItem in many metas # with ContentItem in many metas
'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html',
@ -316,7 +294,7 @@ class RaiIE(RaiBaseIE):
}, { }, {
# with ContentItem in og:url # with ContentItem in og:url
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
'md5': '11959b4e44fa74de47011b5799490adf', 'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
'info_dict': { 'info_dict': {
'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
'ext': 'mp4', 'ext': 'mp4',
@ -326,18 +304,6 @@ class RaiIE(RaiBaseIE):
'duration': 2214, 'duration': 2214,
'upload_date': '20161103', 'upload_date': '20161103',
} }
}, {
# drawMediaRaiTV(...)
'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html',
'md5': '2dd727e61114e1ee9c47f0da6914e178',
'info_dict': {
'id': '59d69d28-6bb6-409d-a4b5-ed44096560af',
'ext': 'mp4',
'title': 'Il pacco',
'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20141221',
},
}, { }, {
# initEdizione('ContentItem-...' # initEdizione('ContentItem-...'
'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined',
@ -349,17 +315,6 @@ class RaiIE(RaiBaseIE):
'upload_date': '20170401', 'upload_date': '20170401',
}, },
'skip': 'Changes daily', 'skip': 'Changes daily',
}, {
# HDS live stream with only relinker URL
'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews',
'info_dict': {
'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc',
'ext': 'flv',
'title': 'EuroNews',
},
'params': {
'skip_download': True,
},
}, { }, {
# HLS live stream with ContentItem in og:url # HLS live stream with ContentItem in og:url
'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html',
@ -469,7 +424,7 @@ class RaiIE(RaiBaseIE):
except ExtractorError: except ExtractorError:
pass pass
relinker_url = self._search_regex( relinker_url = self._proto_relative_url(self._search_regex(
r'''(?x) r'''(?x)
(?: (?:
var\s+videoURL| var\s+videoURL|
@ -481,7 +436,7 @@ class RaiIE(RaiBaseIE):
//mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
(?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
''', ''',
webpage, 'relinker URL', group='url') webpage, 'relinker URL', group='url'))
relinker_info = self._extract_relinker_info( relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id) urljoin(url, relinker_url), video_id)

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
try_get,
)
class RumbleEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
_TESTS = [{
'url': 'https://rumble.com/embed/v5pv5f',
'md5': '36a18a049856720189f30977ccbb2c34',
'info_dict': {
'id': 'v5pv5f',
'ext': 'mp4',
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
'timestamp': 1571611968,
'upload_date': '20191020',
}
}, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'https://rumble.com/embedJS/', video_id,
query={'request': 'video', 'v': video_id})
title = video['title']
formats = []
for height, ua in (video.get('ua') or {}).items():
for i in range(2):
f_url = try_get(ua, lambda x: x[i], compat_str)
if f_url:
ext = determine_ext(f_url)
f = {
'ext': ext,
'format_id': '%s-%sp' % (ext, height),
'height': int_or_none(height),
'url': f_url,
}
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
if bitrate:
f['tbr'] = int_or_none(bitrate)
formats.append(f)
self._sort_formats(formats)
author = video.get('author') or {}
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': video.get('i'),
'timestamp': parse_iso8601(video.get('pubDate')),
'channel': author.get('name'),
'channel_url': author.get('url'),
'duration': int_or_none(video.get('duration')),
}

View File

@ -1,9 +1,15 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
unified_timestamp,
urlencode_postdata,
url_or_none,
)
class ServusIE(InfoExtractor): class ServusIE(InfoExtractor):
@ -12,20 +18,29 @@ class ServusIE(InfoExtractor):
(?:www\.)? (?:www\.)?
(?: (?:
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)| servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
servustv\.com/videos (?:servustv|pm-wissen)\.com/videos
) )
/(?P<id>[aA]{2}-\w+|\d+-\d+) /(?P<id>[aA]{2}-\w+|\d+-\d+)
''' '''
_TESTS = [{ _TESTS = [{
# new URL schema # new URL schema
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/', 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', 'md5': '60474d4c21f3eb148838f215c37f02b9',
'info_dict': { 'info_dict': {
'id': 'AA-1T6VBU5PW1W12', 'id': 'AA-1T6VBU5PW1W12',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Die Grünen aus Sicht des Volkes', 'title': 'Die Grünen aus Sicht des Volkes',
'alt_title': 'Talk im Hangar-7 Voxpops Gruene',
'description': 'md5:1247204d85783afe3682644398ff2ec4', 'description': 'md5:1247204d85783afe3682644398ff2ec4',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 62.442,
'timestamp': 1605193976,
'upload_date': '20201112',
'series': 'Talk im Hangar-7',
'season': 'Season 9',
'season_number': 9,
'episode': 'Episode 31 - September 14',
'episode_number': 31,
} }
}, { }, {
# old URL schema # old URL schema
@ -40,30 +55,94 @@ class ServusIE(InfoExtractor):
}, { }, {
'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/', 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).upper() video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
title = self._search_regex( token = self._download_json(
(r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', 'https://auth.redbullmediahouse.com/token', video_id,
r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'), 'Downloading token', data=urlencode_postdata({
webpage, 'title', default=None, 'grant_type': 'client_credentials',
group='title') or self._og_search_title(webpage) }), headers={
title = re.sub(r'\s*-\s*Servus TV\s*$', '', title) 'Authorization': 'Basic SVgtMjJYNEhBNFdEM1cxMTpEdDRVSkFLd2ZOMG5IMjB1NGFBWTBmUFpDNlpoQ1EzNA==',
description = self._og_search_description(webpage) })
thumbnail = self._og_search_thumbnail(webpage) access_token = token['access_token']
token_type = token.get('token_type', 'Bearer')
formats = self._extract_m3u8_formats( video = self._download_json(
'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id, 'https://sparkle-api.liiift.io/api/v1/stv/channels/international/assets/%s' % video_id,
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') video_id, 'Downloading video JSON', headers={
'Authorization': '%s %s' % (token_type, access_token),
})
formats = []
thumbnail = None
for resource in video['resources']:
if not isinstance(resource, dict):
continue
format_url = url_or_none(resource.get('url'))
if not format_url:
continue
extension = resource.get('extension')
type_ = resource.get('type')
if extension == 'jpg' or type_ == 'reference_keyframe':
thumbnail = format_url
continue
ext = determine_ext(format_url)
if type_ == 'dash' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
elif type_ == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
elif extension == 'mp4' or ext == 'mp4':
formats.append({
'url': format_url,
'format_id': type_,
'width': int_or_none(resource.get('width')),
'height': int_or_none(resource.get('height')),
})
self._sort_formats(formats) self._sort_formats(formats)
attrs = {}
for attribute in video['attributes']:
if not isinstance(attribute, dict):
continue
key = attribute.get('fieldKey')
value = attribute.get('fieldValue')
if not key or not value:
continue
attrs[key] = value
title = attrs.get('title_stv') or video_id
alt_title = attrs.get('title')
description = attrs.get('long_description') or attrs.get('short_description')
series = attrs.get('label')
season = attrs.get('season')
episode = attrs.get('chapter')
duration = float_or_none(attrs.get('duration'), scale=1000)
season_number = int_or_none(self._search_regex(
r'Season (\d+)', season or '', 'season number', default=None))
episode_number = int_or_none(self._search_regex(
r'Episode (\d+)', episode or '', 'episode number', default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'alt_title': alt_title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration,
'timestamp': unified_timestamp(video.get('lastPublished')),
'series': series,
'season': season,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'formats': formats, 'formats': formats,
} }

View File

@ -0,0 +1,239 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
dict_get,
int_or_none,
parse_duration,
unified_timestamp,
)
class SkyItPlayerIE(InfoExtractor):
IE_NAME = 'player.sky.it'
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
_GEO_BYPASS = False
_DOMAIN = 'sky'
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
# http://static.sky.it/static/skyplayer/conf.json
_TOKEN_MAP = {
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
}
def _player_url_result(self, video_id):
return self.url_result(
self._PLAYER_TMPL % (video_id, self._DOMAIN),
SkyItPlayerIE.ie_key(), video_id)
def _parse_video(self, video, video_id):
title = video['title']
is_live = video.get('type') == 'live'
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
if not hls_url and video.get('geoblock' if is_live else 'geob'):
self.raise_geo_restricted(countries=['IT'])
if is_live:
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
else:
formats = self._extract_akamai_formats(
hls_url, video_id, {'http': 'videoplatform.sky.it'})
self._sort_formats(formats)
return {
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
'description': video.get('short_desc') or None,
'timestamp': unified_timestamp(video.get('create_date')),
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
'is_live': is_live,
}
def _real_extract(self, url):
video_id = self._match_id(url)
domain = compat_parse_qs(compat_urllib_parse_urlparse(
url).query).get('domain', [None])[0]
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
video = self._download_json(
'https://apid.sky.it/vdp/v1/getVideoData',
video_id, query={
'caller': 'sky',
'id': video_id,
'token': token
}, headers=self.geo_verification_headers())
return self._parse_video(video, video_id)
class SkyItVideoIE(SkyItPlayerIE):
IE_NAME = 'video.sky.it'
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
'info_dict': {
'id': '631227',
'ext': 'mp4',
'title': 'Uomo ucciso da uno squalo in Australia',
'timestamp': 1606036192,
'upload_date': '20201122',
}
}, {
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
'only_matching': True,
}, {
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._player_url_result(video_id)
class SkyItVideoLiveIE(SkyItPlayerIE):
IE_NAME = 'video.sky.it:live'
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://video.sky.it/diretta/tg24',
'info_dict': {
'id': '1',
'ext': 'mp4',
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
asset_id = compat_str(self._parse_json(self._search_regex(
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
livestream = self._download_json(
'https://apid.sky.it/vdp/v1/getLivestream',
asset_id, query={'id': asset_id})
return self._parse_video(livestream, asset_id)
class SkyItIE(SkyItPlayerIE):
IE_NAME = 'sky.it'
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
'info_dict': {
'id': '631201',
'ext': 'mp4',
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
'upload_date': '20201121',
'timestamp': 1605995753,
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
'info_dict': {
'id': '631227',
'ext': 'mp4',
'title': 'Uomo ucciso da uno squalo in Australia',
'timestamp': 1606036192,
'upload_date': '20201122',
},
}]
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
self._VIDEO_ID_REGEX, webpage, 'video id')
return self._player_url_result(video_id)
class SkyItAcademyIE(SkyItIE):
IE_NAME = 'skyacademy.it'
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
'info_dict': {
'id': '523458',
'ext': 'mp4',
'title': 'Sky Academy "The Best CineCamp 2019"',
'timestamp': 1562843784,
'upload_date': '20190711',
}
}]
_DOMAIN = 'skyacademy'
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
class SkyItArteIE(SkyItIE):
IE_NAME = 'arte.sky.it'
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
'md5': '515aee97b87d7a018b6c80727d3e7e17',
'info_dict': {
'id': '627926',
'ext': 'mp4',
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
'upload_date': '20201106',
'timestamp': 1604664493,
}
}]
_DOMAIN = 'skyarte'
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
class CieloTVItIE(SkyItIE):
IE_NAME = 'cielotv.it'
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
_TESTS = [{
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
'md5': 'c4deed77552ba901c2a0d9258320304b',
'info_dict': {
'id': '499240',
'ext': 'mp4',
'title': 'Il lunedì è sempre un dramma',
'upload_date': '20190329',
'timestamp': 1553862178,
}
}]
_DOMAIN = 'cielo'
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
class TV8ItIE(SkyItVideoIE):
IE_NAME = 'tv8.it'
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
_TESTS = [{
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
'md5': '9ab906a3f75ea342ed928442f9dabd21',
'info_dict': {
'id': '630529',
'ext': 'mp4',
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
'timestamp': 1605721374,
'upload_date': '20201118',
}
}]
_DOMAIN = 'mtv8'

View File

@ -558,7 +558,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
# Per the SoundCloud documentation, the maximum limit for a linked partioning query is 200. # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated # https://developers.soundcloud.com/blog/offset-pagination-deprecated
COMMON_QUERY = { COMMON_QUERY = {
'limit': 200, 'limit': 200,

View File

@ -1,159 +1,54 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import ( from .jwplatform import JWPlatformIE
NexxIE,
NexxEmbedIE,
)
from .spiegeltv import SpiegeltvIE
from ..compat import compat_urlparse
from ..utils import (
parse_duration,
strip_or_none,
unified_timestamp,
)
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': 'b57399839d055fccfeb9a0455c439868', 'md5': '50c7948883ec85a3e431a0a44b7ad1d6',
'info_dict': { 'info_dict': {
'id': '563747', 'id': 'II0BUyxY',
'display_id': '1259285',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', 'title': 'Vulkan Tungurahua in Ecuador ist wieder aktiv - DER SPIEGEL - Wissenschaft',
'description': 'md5:8029d8310232196eb235d27575a8b9f4', 'description': 'md5:8029d8310232196eb235d27575a8b9f4',
'duration': 49, 'duration': 48.0,
'upload_date': '20130311', 'upload_date': '20130311',
'timestamp': 1362994320, 'timestamp': 1362997920,
}, },
}, { }, {
'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html',
'md5': '5b6c2f4add9d62912ed5fc78a1faed80',
'info_dict': {
'id': '580988',
'ext': 'mp4',
'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers',
'description': 'md5:c2322b65e58f385a820c10fa03b2d088',
'duration': 983,
'upload_date': '20131115',
'timestamp': 1384546642,
},
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html',
'md5': '97b91083a672d72976faa8433430afb9',
'info_dict': {
'id': '601883',
'ext': 'mp4',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
'upload_date': '20140904',
'timestamp': 1409834160,
}
}, {
'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
'only_matching': True, 'only_matching': True,
}, { }, {
# nexx video 'url': 'https://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html',
'only_matching': True,
}, {
'url': 'https://www.spiegel.de/panorama/urteile-im-goldmuenzenprozess-haftstrafen-fuer-clanmitglieder-a-aae8df48-43c1-4c61-867d-23f0a2d254b7',
'only_matching': True,
}, {
'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html', 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html',
'only_matching': True, 'only_matching': True,
}] }, {
def _real_extract(self, url):
video_id = self._match_id(url)
metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id
handle = self._request_webpage(metadata_url, video_id)
# 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
if SpiegeltvIE.suitable(handle.geturl()):
return self.url_result(handle.geturl(), 'Spiegeltv')
video_data = self._parse_json(self._webpage_read_content(
handle, metadata_url, video_id), video_id)
title = video_data['title']
nexx_id = video_data['nexxOmniaId']
domain_id = video_data.get('nexxOmniaDomain') or '748'
return {
'_type': 'url_transparent',
'id': video_id,
'url': 'nexx:%s:%s' % (domain_id, nexx_id),
'title': title,
'description': strip_or_none(video_data.get('teaser')),
'duration': parse_duration(video_data.get('duration')),
'timestamp': unified_timestamp(video_data.get('datum')),
'ie_key': NexxIE.ie_key(),
}
class SpiegelArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
IE_NAME = 'Spiegel:Article'
IE_DESC = 'Articles on spiegel.de'
_TESTS = [{
'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html',
'info_dict': { 'only_matching': True,
'id': '1516455',
'ext': 'mp4',
'title': 'Faszination Badminton: Nennt es bloß nicht Federball',
'description': 're:^Patrick Kämnitz gehört.{100,}',
'upload_date': '20140825',
},
}, {
'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html',
'info_dict': {
},
'playlist_count': 6,
}, {
# Nexx iFrame embed
'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
'info_dict': {
'id': '161464',
'ext': 'mp4',
'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
'release_year': 2005,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761,
'timestamp': 1394021479,
'upload_date': '20140305',
},
'params': {
'format': 'bestvideo',
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
media_id = self._html_search_regex(
# Single video on top of the page r'(&#34;|["\'])mediaId\1\s*:\s*(&#34;|["\'])(?P<id>(?:(?!\2).)+)\2',
video_link = self._search_regex( webpage, 'media id', group='id')
r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage, return {
'video page URL', default=None) '_type': 'url_transparent',
if video_link: 'id': video_id,
video_url = compat_urlparse.urljoin( 'display_id': video_id,
self.http_scheme() + '//spiegel.de/', video_link) 'url': 'jwplatform:%s' % media_id,
return self.url_result(video_url) 'title': self._og_search_title(webpage, default=None),
'ie_key': JWPlatformIE.ie_key(),
# Multiple embedded videos }
embeds = re.findall(
r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"',
webpage)
entries = [
self.url_result(compat_urlparse.urljoin(
self.http_scheme() + '//spiegel.de/', embed_path))
for embed_path in embeds]
if embeds:
return self.playlist_result(entries)
return self.playlist_from_matches(
NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key())

View File

@ -1,17 +0,0 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from .nexx import NexxIE
class SpiegeltvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
_TEST = {
'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
'only_matching': True,
}
def _real_extract(self, url):
return self.url_result(
'https://api.nexx.cloud/v3/748/videos/byid/%s'
% self._match_id(url), ie=NexxIE.ie_key())

View File

@ -9,6 +9,7 @@ from ..utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
unified_timestamp,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
try_get, try_get,
@ -44,7 +45,8 @@ class SVTBaseIE(InfoExtractor):
'format_id': player_type, 'format_id': player_type,
'url': vurl, 'url': vurl,
}) })
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): rights = try_get(video_info, lambda x: x['rights'], dict) or {}
if not formats and rights.get('geoBlockedSweden'):
self.raise_geo_restricted( self.raise_geo_restricted(
'This video is only available in Sweden', 'This video is only available in Sweden',
countries=self._GEO_COUNTRIES) countries=self._GEO_COUNTRIES)
@ -70,6 +72,7 @@ class SVTBaseIE(InfoExtractor):
episode = video_info.get('episodeTitle') episode = video_info.get('episodeTitle')
episode_number = int_or_none(video_info.get('episodeNumber')) episode_number = int_or_none(video_info.get('episodeNumber'))
timestamp = unified_timestamp(rights.get('validFrom'))
duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
age_limit = None age_limit = None
adult = dict_get( adult = dict_get(
@ -84,6 +87,7 @@ class SVTBaseIE(InfoExtractor):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': duration, 'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit, 'age_limit': age_limit,
'series': series, 'series': series,
'season_number': season_number, 'season_number': season_number,
@ -136,26 +140,39 @@ class SVTPlayIE(SVTPlayBaseIE):
IE_DESC = 'SVT Play and Öppet arkiv' IE_DESC = 'SVT Play and Öppet arkiv'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
svt:(?P<svt_id>[^/?#&]+)| (?:
svt:|
https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
)
(?P<svt_id>[^/?#&]+)|
https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+) https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', 'url': 'https://www.svtplay.se/video/26194546/det-har-ar-himlen',
'md5': '2b6704fe4a28801e1a098bbf3c5ac611', 'md5': '2382036fd6f8c994856c323fe51c426e',
'info_dict': { 'info_dict': {
'id': '5996901', 'id': 'jNwpV9P',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Flygplan till Haile Selassie', 'title': 'Det här är himlen',
'duration': 3527, 'timestamp': 1586044800,
'thumbnail': r're:^https?://.*[\.-]jpg$', 'upload_date': '20200405',
'duration': 3515,
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
'age_limit': 0, 'age_limit': 0,
'subtitles': { 'subtitles': {
'sv': [{ 'sv': [{
'ext': 'wsrt', 'ext': 'vtt',
}] }]
}, },
}, },
'params': {
'format': 'bestvideo',
# skip for now due to download test asserts that segment is > 10000 bytes and svt uses
# init segments that are smaller
# AssertionError: Expected test_SVTPlay_jNwpV9P.mp4 to be at least 9.77KiB, but it's only 864.00B
'skip_download': True,
},
}, { }, {
# geo restricted to Sweden # geo restricted to Sweden
'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -172,6 +189,12 @@ class SVTPlayIE(SVTPlayBaseIE):
}, { }, {
'url': 'svt:14278044', 'url': 'svt:14278044',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.svt.se/barnkanalen/barnplay/kar/eWv5MLX/',
'only_matching': True,
}, {
'url': 'svt:eWv5MLX',
'only_matching': True,
}] }]
def _adjust_title(self, info): def _adjust_title(self, info):
@ -236,7 +259,10 @@ class SVTPlayIE(SVTPlayBaseIE):
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'), r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
webpage, 'video id') webpage, 'video id')
return self._extract_by_video_id(svt_id, webpage) info_dict = self._extract_by_video_id(svt_id, webpage)
info_dict['thumbnail'] = thumbnail
return info_dict
class SVTSeriesIE(SVTPlayBaseIE): class SVTSeriesIE(SVTPlayBaseIE):
@ -360,7 +386,7 @@ class SVTPageIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url) return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = re.match(self._VALID_URL, url).groups() path, display_id = re.match(self._VALID_URL, url).groups()

View File

@ -86,7 +86,7 @@ class TagesschauPlayerIE(InfoExtractor):
# return self._extract_via_api(kind, video_id) # return self._extract_via_api(kind, video_id)
# JSON api does not provide some audio formats (e.g. ogg) thus # JSON api does not provide some audio formats (e.g. ogg) thus
# extractiong audio via webpage # extracting audio via webpage
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

View File

@ -208,7 +208,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if m: if m:
return [m.group('url')] return [m.group('url')]
# Are whitesapces ignored in URLs? # Are whitespaces ignored in URLs?
# https://github.com/ytdl-org/youtube-dl/issues/12044 # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall( matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage) r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)

View File

@ -56,9 +56,9 @@ class TurnerBaseIE(AdobePassIE):
content_id = xpath_text(video_data, 'contentId') or video_id content_id = xpath_text(video_data, 'contentId') or video_id
# rtmp_src = xpath_text(video_data, 'akamai/src') # rtmp_src = xpath_text(video_data, 'akamai/src')
# if rtmp_src: # if rtmp_src:
# splited_rtmp_src = rtmp_src.split(',') # split_rtmp_src = rtmp_src.split(',')
# if len(splited_rtmp_src) == 2: # if len(split_rtmp_src) == 2:
# rtmp_src = splited_rtmp_src[1] # rtmp_src = split_rtmp_src[1]
# aifp = xpath_text(video_data, 'akamai/aifp', default='') # aifp = xpath_text(video_data, 'akamai/aifp', default='')
urls = [] urls = []

View File

@ -2,7 +2,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_timestamp from ..utils import (
dict_get,
int_or_none,
unified_timestamp,
)
class URPlayIE(InfoExtractor): class URPlayIE(InfoExtractor):
@ -15,8 +19,8 @@ class URPlayIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
'timestamp': 1513512768, 'timestamp': 1513292400,
'upload_date': '20171217', 'upload_date': '20171214',
}, },
}, { }, {
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
@ -25,7 +29,7 @@ class URPlayIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde', 'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
'timestamp': 1440093600, 'timestamp': 1440086400,
'upload_date': '20150820', 'upload_date': '20150820',
}, },
}, { }, {
@ -35,37 +39,58 @@ class URPlayIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url = url.replace('skola.se/Produkter', 'play.se/program')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
urplayer_data = self._parse_json(self._search_regex( urplayer_data = self._parse_json(self._html_search_regex(
r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"',
host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] webpage, 'urplayer data'), video_id)['currentProduct']
episode = urplayer_data['title']
raw_streaming_info = urplayer_data['streamingInfo']['raw']
host = self._download_json(
'http://streaming-loadbalancer.ur.se/loadbalancer.json',
video_id)['redirect']
formats = [] formats = []
for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): for k, v in raw_streaming_info.items():
file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) if not (k in ('sd', 'hd') and isinstance(v, dict)):
continue
file_http = v.get('location')
if file_http: if file_http:
formats.extend(self._extract_wowza_formats( formats.extend(self._extract_wowza_formats(
'http://%s/%splaylist.m3u8' % (host, file_http), video_id, skip_protocols=['rtmp', 'rtsp'])) 'http://%s/%splaylist.m3u8' % (host, file_http),
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} image = urplayer_data.get('image') or {}
for subtitle in urplayer_data.get('subtitles', []): thumbnails = []
subtitle_url = subtitle.get('file') for k, v in image.items():
kind = subtitle.get('kind') t = {
if not subtitle_url or (kind and kind != 'captions'): 'id': k,
continue 'url': v,
subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ }
'url': subtitle_url, wh = k.split('x')
}) if len(wh) == 2:
t.update({
'width': int_or_none(wh[0]),
'height': int_or_none(wh[1]),
})
thumbnails.append(t)
series = urplayer_data.get('series') or {}
series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle'))
return { return {
'id': video_id, 'id': video_id,
'title': urplayer_data['title'], 'title': '%s : %s' % (series_title, episode) if series_title else episode,
'description': self._og_search_description(webpage), 'description': urplayer_data.get('description'),
'thumbnail': urplayer_data.get('image'), 'thumbnails': thumbnails,
'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), 'timestamp': unified_timestamp(urplayer_data.get('publishedAt')),
'series': urplayer_data.get('series_title'), 'series': series_title,
'subtitles': subtitles,
'formats': formats, 'formats': formats,
'duration': int_or_none(urplayer_data.get('duration')),
'categories': urplayer_data.get('categories'),
'tags': urplayer_data.get('keywords'),
'season': series.get('label'),
'episode': episode,
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
} }

View File

@ -1,74 +1,24 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .adobepass import AdobePassIE from .nbc import NBCIE
from ..utils import (
NO_DEFAULT,
smuggle_url,
update_url_query,
)
class USANetworkIE(AdobePassIE): class USANetworkIE(NBCIE):
_VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)' _VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/[^/]+/video/[^/]+/(?P<id>\d+))'
_TEST = { _TESTS = [{
'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity', 'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
'md5': '33c0d2ba381571b414024440d08d57fd',
'info_dict': { 'info_dict': {
'id': '3086229', 'id': '4185302',
'ext': 'mp4', 'ext': 'mp4',
'title': 'HPE Cybersecurity', 'title': 'Intelligence (Trailer)',
'description': 'The more we digitize our world, the more vulnerable we are.', 'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
'upload_date': '20160818', 'upload_date': '20200715',
'timestamp': 1471535460, 'timestamp': 1594785600,
'uploader': 'NBCU-USA', 'uploader': 'NBCU-MPAT',
}, },
} 'params': {
# m3u8 download
def _real_extract(self, url): 'skip_download': True,
display_id = self._match_id(url) },
webpage = self._download_webpage(url, display_id) }]
def _x(name, default=NO_DEFAULT):
return self._search_regex(
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
webpage, name, default=default, group='value')
video_id = _x('mpx-guid')
title = _x('episode-title')
mpx_account_id = _x('mpx-account-id', '2304992029')
query = {
'mbr': 'true',
}
if _x('is-full-episode', None) == '1':
query['manifest'] = 'm3u'
if _x('is-entitlement', None) == '1':
adobe_pass = {}
drupal_settings = self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
webpage, 'drupal settings', fatal=False)
if drupal_settings:
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
if drupal_settings:
adobe_pass = drupal_settings.get('adobePass', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'usa'),
title, video_id, _x('episode-rating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
info = self._search_json_ld(webpage, video_id, default={})
info.update({
'_type': 'url_transparent',
'url': smuggle_url(update_url_query(
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
query), {'force_smil_url': True}),
'id': video_id,
'title': title,
'series': _x('show-title', None),
'episode': title,
'ie_key': 'ThePlatform',
})
return info

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import base64
import hashlib import hashlib
import hmac import hmac
import itertools import itertools
@ -9,6 +10,10 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -165,19 +170,20 @@ class VikiIE(VikiBaseIE):
}, { }, {
# episode # episode
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1', 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
'md5': '5fa476a902e902783ac7a4d615cdbc7a', 'md5': '94e0e34fd58f169f40c184f232356cfe',
'info_dict': { 'info_dict': {
'id': '44699v', 'id': '44699v',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Boys Over Flowers - Episode 1', 'title': 'Boys Over Flowers - Episode 1',
'description': 'md5:b89cf50038b480b88b5b3c93589a9076', 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
'duration': 4204, 'duration': 4172,
'timestamp': 1270496524, 'timestamp': 1270496524,
'upload_date': '20100405', 'upload_date': '20100405',
'uploader': 'group8', 'uploader': 'group8',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
}, { }, {
# youtube external # youtube external
'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1', 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@ -194,14 +200,15 @@ class VikiIE(VikiBaseIE):
'uploader_id': 'ad14065n', 'uploader_id': 'ad14065n',
'like_count': int, 'like_count': int,
'age_limit': 13, 'age_limit': 13,
} },
'skip': 'Page not found!',
}, { }, {
'url': 'http://www.viki.com/player/44699v', 'url': 'http://www.viki.com/player/44699v',
'only_matching': True, 'only_matching': True,
}, { }, {
# non-English description # non-English description
'url': 'http://www.viki.com/videos/158036v-love-in-magic', 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
'md5': '1713ae35df5a521b31f6dc40730e7c9c', 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
'info_dict': { 'info_dict': {
'id': '158036v', 'id': '158036v',
'ext': 'mp4', 'ext': 'mp4',
@ -217,8 +224,11 @@ class VikiIE(VikiBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video = self._call_api( resp = self._download_json(
'videos/%s.json' % video_id, video_id, 'Downloading video JSON') 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON',
headers={'x-viki-app-ver': '4.0.57'})
video = resp['video']
self._check_errors(video) self._check_errors(video)
@ -265,57 +275,74 @@ class VikiIE(VikiBaseIE):
'subtitles': subtitles, 'subtitles': subtitles,
} }
streams = self._call_api(
'videos/%s/streams.json' % video_id, video_id,
'Downloading video streams JSON')
if 'external' in streams:
result.update({
'_type': 'url_transparent',
'url': streams['external']['url'],
})
return result
formats = [] formats = []
for format_id, stream_dict in streams.items():
height = int_or_none(self._search_regex( def add_format(format_id, format_dict, protocol='http'):
r'^(\d+)[pP]$', format_id, 'height', default=None)) # rtmps URLs does not seem to work
for protocol, format_dict in stream_dict.items(): if protocol == 'rtmps':
# rtmps URLs does not seem to work return
if protocol == 'rtmps': format_url = format_dict.get('url')
continue if not format_url:
format_url = format_dict['url'] return
if format_id == 'm3u8': qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
m3u8_formats = self._extract_m3u8_formats( stream = qs.get('stream', [None])[0]
format_url, video_id, 'mp4', if stream:
entry_protocol='m3u8_native', format_url = base64.b64decode(stream).decode()
m3u8_id='m3u8-%s' % protocol, fatal=False) if format_id in ('m3u8', 'hls'):
# Despite CODECS metadata in m3u8 all video-only formats m3u8_formats = self._extract_m3u8_formats(
# are actually video+audio format_url, video_id, 'mp4',
for f in m3u8_formats: entry_protocol='m3u8_native',
if f.get('acodec') == 'none' and f.get('vcodec') != 'none': m3u8_id='m3u8-%s' % protocol, fatal=False)
f['acodec'] = None # Despite CODECS metadata in m3u8 all video-only formats
formats.extend(m3u8_formats) # are actually video+audio
elif format_url.startswith('rtmp'): for f in m3u8_formats:
mobj = re.search( if '_drm/index_' in f['url']:
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
continue continue
formats.append({ if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
'format_id': 'rtmp-%s' % format_id, f['acodec'] = None
'ext': 'flv', formats.append(f)
'url': mobj.group('url'), elif format_id in ('mpd', 'dash'):
'play_path': mobj.group('playpath'), formats.extend(self._extract_mpd_formats(
'app': mobj.group('app'), format_url, video_id, 'mpd-%s' % protocol, fatal=False))
'page_url': url, elif format_url.startswith('rtmp'):
}) mobj = re.search(
else: r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
formats.append({ format_url)
'url': format_url, if not mobj:
'format_id': '%s-%s' % (format_id, protocol), return
'height': height, formats.append({
}) 'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
})
else:
formats.append({
'url': format_url,
'format_id': '%s-%s' % (format_id, protocol),
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)),
})
for format_id, format_dict in (resp.get('streams') or {}).items():
add_format(format_id, format_dict)
if not formats:
streams = self._call_api(
'videos/%s/streams.json' % video_id, video_id,
'Downloading video streams JSON')
if 'external' in streams:
result.update({
'_type': 'url_transparent',
'url': streams['external']['url'],
})
return result
for format_id, stream_dict in streams.items():
for protocol, format_dict in stream_dict.items():
add_format(format_id, format_dict, protocol)
self._sort_formats(formats) self._sort_formats(formats)
result['formats'] = formats result['formats'] = formats

View File

@ -922,7 +922,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
}] }]
_PAGE_SIZE = 100 _PAGE_SIZE = 100
def _fetch_page(self, album_id, authorizaion, hashed_pass, page): def _fetch_page(self, album_id, authorization, hashed_pass, page):
api_page = page + 1 api_page = page + 1
query = { query = {
'fields': 'link,uri', 'fields': 'link,uri',
@ -934,7 +934,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
videos = self._download_json( videos = self._download_json(
'https://api.vimeo.com/albums/%s/videos' % album_id, 'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={ album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorizaion, 'Authorization': 'jwt ' + authorization,
})['data'] })['data']
for video in videos: for video in videos:
link = video.get('link') link = video.get('link')
@ -946,10 +946,13 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) album_id = self._match_id(url)
webpage = self._download_webpage(url, album_id) viewer = self._download_json(
viewer = self._parse_json(self._search_regex( 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
r'bootstrap_data\s*=\s*({.+?})</script>', if not viewer:
webpage, 'bootstrap data'), album_id)['viewer'] webpage = self._download_webpage(url, album_id)
viewer = self._parse_json(self._search_regex(
r'bootstrap_data\s*=\s*({.+?})</script>',
webpage, 'bootstrap data'), album_id)['viewer']
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,

View File

@ -1,25 +1,30 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import time
import itertools import itertools
import json
from .common import InfoExtractor
from .naver import NaverBaseIE from .naver import NaverBaseIE
from ..compat import compat_str from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
merge_dicts, merge_dicts,
remove_start,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
class VLiveIE(NaverBaseIE): class VLiveBaseIE(NaverBaseIE):
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vlive.tv/video/1326', 'url': 'http://www.vlive.tv/video/1326',
@ -27,7 +32,7 @@ class VLiveIE(NaverBaseIE):
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
'ext': 'mp4', 'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast", 'title': "Girl's Day's Broadcast",
'creator': "Girl's Day", 'creator': "Girl's Day",
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a', 'uploader_id': 'muploader_a',
@ -37,7 +42,7 @@ class VLiveIE(NaverBaseIE):
'info_dict': { 'info_dict': {
'id': '16937', 'id': '16937',
'ext': 'mp4', 'ext': 'mp4',
'title': '[V LIVE] 첸백시 걍방', 'title': '첸백시 걍방',
'creator': 'EXO', 'creator': 'EXO',
'view_count': int, 'view_count': int,
'subtitles': 'mincount:12', 'subtitles': 'mincount:12',
@ -58,12 +63,11 @@ class VLiveIE(NaverBaseIE):
'subtitles': 'mincount:10', 'subtitles': 'mincount:10',
}, },
'skip': 'This video is only available for CH+ subscribers', 'skip': 'This video is only available for CH+ subscribers',
}, {
'url': 'https://www.vlive.tv/embed/1326',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -95,173 +99,122 @@ class VLiveIE(NaverBaseIE):
if not is_logged_in(): if not is_logged_in():
raise ExtractorError('Unable to log in', expected=True) raise ExtractorError('Unable to log in', expected=True)
def _call_api(self, path_template, video_id, fields=None):
query = {'appId': self._APP_ID}
if fields:
query['fields'] = fields
return self._download_json(
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( try:
'https://www.vlive.tv/video/%s' % video_id, video_id) post = self._call_api(
'post/v1.0/officialVideoPost-%s', video_id,
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
raise
VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)' video = post['officialVideo']
VIDEO_PARAMS_FIELD = 'video params'
params = self._parse_json(self._search_regex( def get_common_fields():
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id, channel = post.get('channel') or {}
transform_source=lambda s: '[' + s + ']', fatal=False) return {
'title': video.get('title'),
'creator': post.get('author', {}).get('nickname'),
'channel': channel.get('channelName'),
'channel_id': channel.get('channelCode'),
'duration': int_or_none(video.get('playTime')),
'view_count': int_or_none(video.get('playCount')),
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
}
if not params or len(params) < 7: video_type = video.get('type')
params = self._search_regex( if video_type == 'VOD':
VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD) inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)] vod_id = video['vodId']
return merge_dicts(
status, long_video_id, key = params[2], params[5], params[6] get_common_fields(),
status = remove_start(status, 'PRODUCT_') self._extract_video_info(video_id, vod_id, inkey))
elif video_type == 'LIVE':
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'): status = video.get('status')
return self._live(video_id, webpage) if status == 'ON_AIR':
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'): stream_url = self._call_api(
return self._replay(video_id, webpage, long_video_id, key) 'old/v3/live/%s/playInfo',
video_id)['result']['adaptiveStreamUrl']
if status == 'LIVE_END': formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
raise ExtractorError('Uploading for replay. Please wait...', info = get_common_fields()
expected=True) info.update({
elif status == 'COMING_SOON': 'title': self._live_title(video['title']),
raise ExtractorError('Coming soon!', expected=True) 'id': video_id,
elif status == 'CANCELED': 'formats': formats,
raise ExtractorError('We are sorry, ' 'is_live': True,
'but the live broadcast has been canceled.', })
expected=True) return info
elif status == 'ONLY_APP': elif status == 'ENDED':
raise ExtractorError('Unsupported video type', expected=True) raise ExtractorError(
else: 'Uploading for replay. Please wait...', expected=True)
raise ExtractorError('Unknown status %s' % status) elif status == 'RESERVED':
raise ExtractorError('Coming soon!', expected=True)
def _get_common_fields(self, webpage): elif video.get('exposeStatus') == 'CANCEL':
title = self._og_search_title(webpage) raise ExtractorError(
creator = self._html_search_regex( 'We are sorry, but the live broadcast has been canceled.',
r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)', expected=True)
webpage, 'creator', fatal=False) else:
thumbnail = self._og_search_thumbnail(webpage) raise ExtractorError('Unknown status ' + status)
return {
'title': title,
'creator': creator,
'thumbnail': thumbnail,
}
def _live(self, video_id, webpage):
init_page = self._download_init_page(video_id)
live_params = self._search_regex(
r'"liveStreamInfo"\s*:\s*(".*"),',
init_page, 'live stream info')
live_params = self._parse_json(live_params, video_id)
live_params = self._parse_json(live_params, video_id)
formats = []
for vid in live_params.get('resolutions', []):
formats.extend(self._extract_m3u8_formats(
vid['cdnUrl'], video_id, 'mp4',
m3u8_id=vid.get('name'),
fatal=False, live=True))
self._sort_formats(formats)
info = self._get_common_fields(webpage)
info.update({
'title': self._live_title(info['title']),
'id': video_id,
'formats': formats,
'is_live': True,
})
return info
def _replay(self, video_id, webpage, long_video_id, key):
if '' in (long_video_id, key):
init_page = self._download_init_page(video_id)
video_info = self._parse_json(self._search_regex(
(r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
video_id)
if video_info.get('status') == 'NEED_CHANNEL_PLUS':
self.raise_login_required(
'This video is only available for CH+ subscribers')
long_video_id, key = video_info['vid'], video_info['inkey']
return merge_dicts(
self._get_common_fields(webpage),
self._extract_video_info(video_id, long_video_id, key))
def _download_init_page(self, video_id):
return self._download_webpage(
'https://www.vlive.tv/video/init/view',
video_id, note='Downloading live webpage',
data=urlencode_postdata({'videoSeq': video_id}),
headers={
'Referer': 'https://www.vlive.tv/video/%s' % video_id,
'Content-Type': 'application/x-www-form-urlencoded'
})
class VLiveChannelIE(InfoExtractor): class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel' IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)' _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
_TEST = { _TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B', 'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': { 'info_dict': {
'id': 'FCD4B', 'id': 'FCD4B',
'title': 'MAMAMOO', 'title': 'MAMAMOO',
}, },
'playlist_mincount': 110 'playlist_mincount': 110
} }, {
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' 'url': 'https://www.vlive.tv/channel/FCD4B',
'only_matching': True,
}]
def _call_api(self, path, channel_key_suffix, channel_value, note, query):
q = {
'app_id': self._APP_ID,
'channel' + channel_key_suffix: channel_value,
}
q.update(query)
return self._download_json(
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
channel_value, note='Downloading ' + note, query=q)['result']
def _real_extract(self, url): def _real_extract(self, url):
channel_code = self._match_id(url) channel_code = self._match_id(url)
webpage = self._download_webpage( channel_seq = self._call_api(
'http://channels.vlive.tv/%s/video' % channel_code, channel_code) 'decodeChannelCode', 'Code', channel_code,
'decode channel code', {})['channelSeq']
app_id = None
app_js_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
webpage, 'app js', default=None, group='url')
if app_js_url:
app_js = self._download_webpage(
app_js_url, channel_code, 'Downloading app JS', fatal=False)
if app_js:
app_id = self._search_regex(
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
app_js, 'app id', default=None)
app_id = app_id or self._APP_ID
channel_info = self._download_json(
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
channel_code, note='Downloading decode channel code',
query={
'app_id': app_id,
'channelCode': channel_code,
'_': int(time.time())
})
channel_seq = channel_info['result']['channelSeq']
channel_name = None channel_name = None
entries = [] entries = []
for page_num in itertools.count(1): for page_num in itertools.count(1):
video_list = self._download_json( video_list = self._call_api(
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', 'getChannelVideoList', 'Seq', channel_seq,
channel_code, note='Downloading channel list page #%d' % page_num, 'channel list page #%d' % page_num, {
query={
'app_id': app_id,
'channelSeq': channel_seq,
# Large values of maxNumOfRows (~300 or above) may cause # Large values of maxNumOfRows (~300 or above) may cause
# empty responses (see [1]), e.g. this happens for [2] that # empty responses (see [1]), e.g. this happens for [2] that
# has more than 300 videos. # has more than 300 videos.
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830 # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
# 2. http://channels.vlive.tv/EDBF. # 2. http://channels.vlive.tv/EDBF.
'maxNumOfRows': 100, 'maxNumOfRows': 100,
'_': int(time.time()),
'pageNo': page_num 'pageNo': page_num
} }
) )
@ -269,11 +222,11 @@ class VLiveChannelIE(InfoExtractor):
if not channel_name: if not channel_name:
channel_name = try_get( channel_name = try_get(
video_list, video_list,
lambda x: x['result']['channelInfo']['channelName'], lambda x: x['channelInfo']['channelName'],
compat_str) compat_str)
videos = try_get( videos = try_get(
video_list, lambda x: x['result']['videoList'], list) video_list, lambda x: x['videoList'], list)
if not videos: if not videos:
break break
@ -289,79 +242,3 @@ class VLiveChannelIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
entries, channel_code, channel_name) entries, channel_code, channel_name)
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
_TESTS = [{
# regular working playlist
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
'info_dict': {
'id': '117963',
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
},
'playlist_mincount': 10
}, {
# playlist with no playlistVideoSeqs
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
'id': '22867',
'ext': 'mp4',
'title': '[V LIVE] Valentine Day Message from MINA',
'creator': 'TWICE',
'view_count': int
},
'params': {
'skip_download': True,
}
}]
def _build_video_result(self, video_id, message):
self.to_screen(message)
return self.url_result(
self._VIDEO_URL_TEMPLATE % video_id,
ie=VLiveIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
if self._downloader.params.get('noplaylist'):
return self._build_video_result(
video_id,
'Downloading just video %s because of --no-playlist'
% video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% playlist_id)
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
raw_item_ids = self._search_regex(
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
'playlist video seqs', default=None, fatal=False)
if not raw_item_ids:
return self._build_video_result(
video_id,
'Downloading just video %s because no playlist was found'
% video_id)
item_ids = self._parse_json(raw_item_ids, playlist_id)
entries = [
self.url_result(
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
playlist_name = self._html_search_regex(
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
webpage, 'playlist title', fatal=False)
return self.playlist_result(entries, playlist_id, playlist_name)

View File

@ -54,17 +54,17 @@ class XiamiBaseIE(InfoExtractor):
def _decrypt(origin): def _decrypt(origin):
n = int(origin[0]) n = int(origin[0])
origin = origin[1:] origin = origin[1:]
short_lenth = len(origin) // n short_length = len(origin) // n
long_num = len(origin) - short_lenth * n long_num = len(origin) - short_length * n
l = tuple() l = tuple()
for i in range(0, n): for i in range(0, n):
length = short_lenth length = short_length
if i < long_num: if i < long_num:
length += 1 length += 1
l += (origin[0:length], ) l += (origin[0:length], )
origin = origin[length:] origin = origin[length:]
ans = '' ans = ''
for i in range(0, short_lenth + 1): for i in range(0, short_length + 1):
for j in range(0, n): for j in range(0, n):
if len(l[j]) > i: if len(l[j]) > i:
ans += l[j][i] ans += l[j][i]

View File

@ -90,7 +90,7 @@ class XTubeIE(InfoExtractor):
title, thumbnail, duration = [None] * 3 title, thumbnail, duration = [None] * 3
config = self._parse_json(self._search_regex( config = self._parse_json(self._search_regex(
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config', r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
default='{}'), video_id, transform_source=js_to_json, fatal=False) default='{}'), video_id, transform_source=js_to_json, fatal=False)
if config: if config:
config = config.get('mainRoll') config = config.get('mainRoll')

View File

@ -29,7 +29,6 @@ class YouPornIE(InfoExtractor):
'upload_date': '20101217', 'upload_date': '20101217',
'average_rating': int, 'average_rating': int,
'view_count': int, 'view_count': int,
'comment_count': int,
'categories': list, 'categories': list,
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
@ -48,7 +47,6 @@ class YouPornIE(InfoExtractor):
'upload_date': '20110418', 'upload_date': '20110418',
'average_rating': int, 'average_rating': int,
'view_count': int, 'view_count': int,
'comment_count': int,
'categories': list, 'categories': list,
'tags': list, 'tags': list,
'age_limit': 18, 'age_limit': 18,
@ -156,7 +154,8 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex( upload_date = unified_strdate(self._html_search_regex(
[r'Date\s+[Aa]dded:\s*<span>([^<]+)', [r'UPLOADED:\s*<span>([^<]+)',
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'], r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
webpage, 'upload date', fatal=False)) webpage, 'upload date', fatal=False))
@ -171,7 +170,7 @@ class YouPornIE(InfoExtractor):
webpage, 'view count', fatal=False, group='count')) webpage, 'view count', fatal=False, group='count'))
comment_count = str_to_int(self._search_regex( comment_count = str_to_int(self._search_regex(
r'>All [Cc]omments? \(([\d,.]+)\)', r'>All [Cc]omments? \(([\d,.]+)\)',
webpage, 'comment count', fatal=False)) webpage, 'comment count', default=None))
def extract_tag_box(regex, title): def extract_tag_box(regex, title):
tag_box = self._search_regex(regex, webpage, title, default=None) tag_box = self._search_regex(regex, webpage, title, default=None)

File diff suppressed because it is too large Load Diff

View File

@ -2458,7 +2458,7 @@ class XAttrMetadataError(YoutubeDLError):
# Parsing code and msg # Parsing code and msg
if (self.code in (errno.ENOSPC, errno.EDQUOT) if (self.code in (errno.ENOSPC, errno.EDQUOT)
or 'No space left' in self.msg or 'Disk quota excedded' in self.msg): or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
self.reason = 'NO_SPACE' self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg: elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG' self.reason = 'VALUE_TOO_LONG'
@ -4078,7 +4078,7 @@ def js_to_json(code):
v = m.group(0) v = m.group(0)
if v in ('true', 'false', 'null'): if v in ('true', 'false', 'null'):
return v return v
elif v.startswith('/*') or v.startswith('//') or v == ',': elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
return "" return ""
if v[0] in ("'", '"'): if v[0] in ("'", '"'):
@ -4103,7 +4103,8 @@ def js_to_json(code):
{comment}|,(?={skip}[\]}}])| {comment}|,(?={skip}[\]}}])|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*| (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?| \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
[0-9]+(?={skip}:) [0-9]+(?={skip}:)|
!+
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code) '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
@ -4206,10 +4207,10 @@ def parse_codecs(codecs_str):
# http://tools.ietf.org/html/rfc6381 # http://tools.ietf.org/html/rfc6381
if not codecs_str: if not codecs_str:
return {} return {}
splited_codecs = list(filter(None, map( split_codecs = list(filter(None, map(
lambda str: str.strip(), codecs_str.strip().strip(',').split(',')))) lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
vcodec, acodec = None, None vcodec, acodec = None, None
for full_codec in splited_codecs: for full_codec in split_codecs:
codec = full_codec.split('.')[0] codec = full_codec.split('.')[0]
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
if not vcodec: if not vcodec:
@ -4220,10 +4221,10 @@ def parse_codecs(codecs_str):
else: else:
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
if not vcodec and not acodec: if not vcodec and not acodec:
if len(splited_codecs) == 2: if len(split_codecs) == 2:
return { return {
'vcodec': splited_codecs[0], 'vcodec': split_codecs[0],
'acodec': splited_codecs[1], 'acodec': split_codecs[1],
} }
else: else:
return { return {
@ -5462,7 +5463,7 @@ def encode_base_n(num, n, table=None):
def decode_packed_codes(code): def decode_packed_codes(code):
mobj = re.search(PACKED_CODES_RE, code) mobj = re.search(PACKED_CODES_RE, code)
obfucasted_code, base, count, symbols = mobj.groups() obfuscated_code, base, count, symbols = mobj.groups()
base = int(base) base = int(base)
count = int(count) count = int(count)
symbols = symbols.split('|') symbols = symbols.split('|')
@ -5475,7 +5476,7 @@ def decode_packed_codes(code):
return re.sub( return re.sub(
r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
obfucasted_code) obfuscated_code)
def caesar(s, alphabet, shift): def caesar(s, alphabet, shift):

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2020.09.20' __version__ = '2020.11.21.1'