mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-14 20:28:36 +09:00
Compare commits
88 Commits
2021.01.24
...
d81421af4b
Author | SHA1 | Date | |
---|---|---|---|
![]() |
d81421af4b | ||
![]() |
7422a2194f | ||
![]() |
2090dbdc8c | ||
![]() |
0a04e03a02 | ||
![]() |
44b2d5f5fc | ||
![]() |
aa9118a373 | ||
![]() |
36abc16c3c | ||
![]() |
919d764600 | ||
![]() |
696183e133 | ||
![]() |
f90d825a6b | ||
![]() |
3037ab00c7 | ||
![]() |
21e872b19a | ||
![]() |
cf2dbec630 | ||
![]() |
b92bb0e02a | ||
![]() |
40edffae3d | ||
![]() |
9fc5eafb8e | ||
![]() |
08c2fbb844 | ||
![]() |
3997efb65e | ||
![]() |
a7356dffe9 | ||
![]() |
e20ec43094 | ||
![]() |
70baa7bfae | ||
![]() |
8980f53b42 | ||
![]() |
a363fb5d28 | ||
![]() |
646052e416 | ||
![]() |
844e4cbc54 | ||
![]() |
56c63c8c02 | ||
![]() |
07eb8f1916 | ||
![]() |
4b5410c5c8 | ||
![]() |
be2e9b76ee | ||
![]() |
d8085580f6 | ||
![]() |
6d32c6c6d3 | ||
![]() |
f94d764993 | ||
![]() |
f28f1b4d6e | ||
![]() |
360d5f0daa | ||
![]() |
cd493c5adc | ||
![]() |
a4c7ed6b1e | ||
![]() |
7f8b8bc418 | ||
![]() |
311ebdd9a5 | ||
![]() |
99c68db0a8 | ||
![]() |
5fc53690cb | ||
![]() |
7a9161578e | ||
![]() |
2405854705 | ||
![]() |
0cf09c2b41 | ||
![]() |
0156ce95c5 | ||
![]() |
1641b13232 | ||
![]() |
a4bdc3112b | ||
![]() |
c7d407bca2 | ||
![]() |
7215691ab7 | ||
![]() |
fc88e8f0e3 | ||
![]() |
cfefb7d854 | ||
![]() |
3c07d007ca | ||
![]() |
89c5a7d5aa | ||
![]() |
2adc0c51cd | ||
![]() |
1f0910bc27 | ||
![]() |
e22ff4e356 | ||
![]() |
83031d749b | ||
![]() |
1b731ebcaa | ||
![]() |
ab25f3f431 | ||
![]() |
07f7aad81c | ||
![]() |
1e2575df87 | ||
![]() |
b111a64135 | ||
![]() |
0e3a968479 | ||
![]() |
c11f7cf9bd | ||
![]() |
8fa7cc387d | ||
![]() |
65eee5a745 | ||
![]() |
efef4ddf51 | ||
![]() |
159a3d48df | ||
![]() |
b46483a6ec | ||
![]() |
9c724601ba | ||
![]() |
67299f23d8 | ||
![]() |
8bf9591a70 | ||
![]() |
a800838f5a | ||
![]() |
ba15b2fee6 | ||
![]() |
56a7ee9033 | ||
![]() |
0b4f03a563 | ||
![]() |
7b8fa658f8 | ||
![]() |
fd95fc33b1 | ||
![]() |
c669554ef5 | ||
![]() |
11b68df7a4 | ||
![]() |
d18f4419a7 | ||
![]() |
0f7d413d5b | ||
![]() |
286e5d6724 | ||
![]() |
395981288b | ||
![]() |
55bb3556c8 | ||
![]() |
57f2488bbe | ||
![]() |
ea399a53eb | ||
![]() |
811a183eb6 | ||
![]() |
b63981e850 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.01.24
|
||||
[debug] youtube-dl version 2021.02.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2021.01.24
|
||||
[debug] youtube-dl version 2021.02.22
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2021.02.22. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.01.24**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2021.02.22**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
1
AUTHORS
1
AUTHORS
@@ -246,3 +246,4 @@ Enes Solak
|
||||
Nathan Rossi
|
||||
Thomas van der Berg
|
||||
Luca Cherubin
|
||||
Adrian Heine
|
105
ChangeLog
105
ChangeLog
@@ -1,4 +1,107 @@
|
||||
version 2021.01.24
|
||||
version 2021.02.22
|
||||
|
||||
Core
|
||||
+ [postprocessor/embedthumbnail] Recognize atomicparsley binary in lowercase
|
||||
(#28112)
|
||||
|
||||
Extractors
|
||||
* [apa] Fix and improve extraction (#27750)
|
||||
+ [youporn] Extract duration (#28019)
|
||||
+ [peertube] Add support for canard.tube (#28190)
|
||||
* [youtube] Fixup m4a_dash formats (#28165)
|
||||
+ [samplefocus] Add support for samplefocus.com (#27763)
|
||||
+ [vimeo] Add support for unlisted video source format extraction
|
||||
* [viki] Improve extraction (#26522, #28203)
|
||||
* Extract uploader URL and episode number
|
||||
* Report login required error
|
||||
+ Extract 480p formats
|
||||
* Fix API v4 calls
|
||||
* [ninegag] Unescape title (#28201)
|
||||
* [youtube] Improve URL regular expression (#28193)
|
||||
+ [youtube] Add support for redirect.invidious.io (#28193)
|
||||
+ [dplay] Add support for de.hgtv.com (#28182)
|
||||
+ [dplay] Add support for discoveryplus.com (#24698)
|
||||
+ [simplecast] Add support for simplecast.com (#24107)
|
||||
* [youtube] Fix uploader extraction in flat playlist mode (#28045)
|
||||
* [yandexmusic:playlist] Request missing tracks in chunks (#27355, #28184)
|
||||
+ [storyfire] Add support for storyfire.com (#25628, #26349)
|
||||
+ [zhihu] Add support for zhihu.com (#28177)
|
||||
* [youtube] Fix controversial videos when authenticated with cookies (#28174)
|
||||
* [ccma] Fix timestamp parsing in python 2
|
||||
+ [videopress] Add support for video.wordpress.com
|
||||
* [kakao] Improve info extraction and detect geo restriction (#26577)
|
||||
* [xboxclips] Fix extraction (#27151)
|
||||
* [ard] Improve formats extraction (#28155)
|
||||
+ [canvas] Add support for dagelijksekost.een.be (#28119)
|
||||
|
||||
|
||||
version 2021.02.10
|
||||
|
||||
Extractors
|
||||
* [youtube:tab] Improve grid continuation extraction (#28130)
|
||||
* [ign] Fix extraction (#24771)
|
||||
+ [xhamster] Extract format filesize
|
||||
+ [xhamster] Extract formats from xplayer settings (#28114)
|
||||
+ [youtube] Add support phone/tablet JS player (#26424)
|
||||
* [archiveorg] Fix and improve extraction (#21330, #23586, #25277, #26780,
|
||||
#27109, #27236, #28063)
|
||||
+ [cda] Detect geo restricted videos (#28106)
|
||||
* [urplay] Fix extraction (#28073, #28074)
|
||||
* [youtube] Fix release date extraction (#28094)
|
||||
+ [youtube] Extract abr and vbr (#28100)
|
||||
* [youtube] Skip OTF formats (#28070)
|
||||
|
||||
|
||||
version 2021.02.04.1
|
||||
|
||||
Extractors
|
||||
* [youtube] Prefer DASH formats (#28070)
|
||||
* [azmedien] Fix extraction (#28064)
|
||||
|
||||
|
||||
version 2021.02.04
|
||||
|
||||
Extractors
|
||||
* [pornhub] Implement lazy playlist extraction
|
||||
* [svtplay] Fix video id extraction (#28058)
|
||||
+ [pornhub] Add support for authentication (#18797, #21416, #24294)
|
||||
* [pornhub:user] Improve paging
|
||||
+ [pornhub:user] Add support for URLs unavailable via /videos page (#27853)
|
||||
+ [bravotv] Add support for oxygen.com (#13357, #22500)
|
||||
+ [youtube] Pass embed URL to get_video_info request
|
||||
* [ccma] Improve metadata extraction (#27994)
|
||||
+ Extract age limit, alt title, categories, series and episode number
|
||||
* Fix timestamp multiple subtitles extraction
|
||||
* [egghead] Update API domain (#28038)
|
||||
- [vidzi] Remove extractor (#12629)
|
||||
* [vidio] Improve metadata extraction
|
||||
* [youtube] Improve subtitles extraction
|
||||
* [youtube] Fix chapter extraction fallback
|
||||
* [youtube] Rewrite extractor
|
||||
* Improve format sorting
|
||||
* Remove unused code
|
||||
* Fix series metadata extraction
|
||||
* Fix trailer video extraction
|
||||
* Improve error reporting
|
||||
+ Extract video location
|
||||
+ [vvvvid] Add support for youtube embeds (#27825)
|
||||
* [googledrive] Report download page errors (#28005)
|
||||
* [vlive] Fix error message decoding for python 2 (#28004)
|
||||
* [youtube] Improve DASH formats file size extraction
|
||||
* [cda] Improve birth validation detection (#14022, #27929)
|
||||
+ [awaan] Extract uploader id (#27963)
|
||||
+ [medialaan] Add support DPG Media MyChannels based websites (#14871, #15597,
|
||||
#16106, #16489)
|
||||
* [abcnews] Fix extraction (#12394, #27920)
|
||||
* [AMP] Fix upload date and timestamp extraction (#27970)
|
||||
* [tv4] Relax URL regular expression (#27964)
|
||||
+ [tv2] Add support for mtvuutiset.fi (#27744)
|
||||
* [adn] Improve login warning reporting
|
||||
* [zype] Fix uplynk id extraction (#27956)
|
||||
+ [adn] Add support for authentication (#17091, #27841, #27937)
|
||||
|
||||
|
||||
version 2021.01.24.1
|
||||
|
||||
Core
|
||||
* Introduce --output-na-placeholder (#27896)
|
||||
|
@@ -1,6 +1,5 @@
|
||||
# Supported sites
|
||||
- **1tv**: Первый канал
|
||||
- **1up.com**
|
||||
- **20min**
|
||||
- **220.ro**
|
||||
- **23video**
|
||||
@@ -213,6 +212,7 @@
|
||||
- **curiositystream**
|
||||
- **curiositystream:collection**
|
||||
- **CWTV**
|
||||
- **DagelijkseKost**: dagelijksekost.een.be
|
||||
- **DailyMail**
|
||||
- **dailymotion**
|
||||
- **dailymotion:playlist**
|
||||
@@ -234,6 +234,7 @@
|
||||
- **DiscoveryGo**
|
||||
- **DiscoveryGoPlaylist**
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryPlus**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **dlive:stream**
|
||||
@@ -354,6 +355,7 @@
|
||||
- **HentaiStigma**
|
||||
- **hetklokhuis**
|
||||
- **hgtv.com:show**
|
||||
- **HGTVDe**
|
||||
- **HiDive**
|
||||
- **HistoricFilms**
|
||||
- **history:player**
|
||||
@@ -376,6 +378,8 @@
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **ign.com**
|
||||
- **IGNArticle**
|
||||
- **IGNVideo**
|
||||
- **IHeartRadio**
|
||||
- **iheartradio:podcast**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
@@ -537,6 +541,7 @@
|
||||
- **mtv:video**
|
||||
- **mtvjapan**
|
||||
- **mtvservices:embedded**
|
||||
- **MTVUutisetArticle**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **mva**: Microsoft Virtual Academy videos
|
||||
- **mva:course**: Microsoft Virtual Academy courses
|
||||
@@ -675,7 +680,6 @@
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **pcmag**
|
||||
- **PearVideo**
|
||||
- **PeerTube**
|
||||
- **People**
|
||||
@@ -802,6 +806,7 @@
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **SAKTV**
|
||||
- **SaltTV**
|
||||
- **SampleFocus**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@@ -824,6 +829,9 @@
|
||||
- **ShahidShow**
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **simplecast**
|
||||
- **simplecast:episode**
|
||||
- **simplecast:podcast**
|
||||
- **Sina**
|
||||
- **sky.it**
|
||||
- **sky:news**
|
||||
@@ -876,6 +884,9 @@
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **StitcherShow**
|
||||
- **StoryFire**
|
||||
- **StoryFireSeries**
|
||||
- **StoryFireUser**
|
||||
- **Streamable**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
@@ -1058,7 +1069,6 @@
|
||||
- **vidme**
|
||||
- **vidme:user**
|
||||
- **vidme:user:likes**
|
||||
- **Vidzi**
|
||||
- **vier**: vier.be and vijf.be
|
||||
- **vier:videos**
|
||||
- **viewlift**
|
||||
@@ -1103,6 +1113,7 @@
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
- **VShare**
|
||||
- **VTM**
|
||||
- **VTXTV**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
@@ -1197,5 +1208,6 @@
|
||||
- **ZattooLive**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **Zhihu**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
@@ -1,275 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import expect_value
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
|
||||
|
||||
class TestYoutubeChapters(unittest.TestCase):
|
||||
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://www.youtube.com/watch?v=A22oy8dFjqc
|
||||
# pattern: 00:00 - <title>
|
||||
'''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***<br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+36);return false;">00:36</a> - Bohemian Rhapsody<br /><a href="#" onclick="yt.www.watch.player.seekTo(02*60+42);return false;">02:42</a> - Radio Ga Ga<br /><a href="#" onclick="yt.www.watch.player.seekTo(06*60+53);return false;">06:53</a> - Ay Oh!<br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+34);return false;">07:34</a> - Hammer To Fall<br /><a href="#" onclick="yt.www.watch.player.seekTo(12*60+08);return false;">12:08</a> - Crazy Little Thing Called Love<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+03);return false;">16:03</a> - We Will Rock You<br /><a href="#" onclick="yt.www.watch.player.seekTo(17*60+18);return false;">17:18</a> - We Are The Champions<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+12);return false;">21:12</a> - Is This The World We Created...?<br /><br />Short song analysis:<br /><br />- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).<br /><br />- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!<br /><br />- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!<br /><br />- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!<br /><br />- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.<br /><br />- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!<br /><br />- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!<br /><br />- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.<br /><br /><br />All rights go to their respective owners!<br />-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''',
|
||||
1477,
|
||||
[{
|
||||
'start_time': 36,
|
||||
'end_time': 162,
|
||||
'title': 'Bohemian Rhapsody',
|
||||
}, {
|
||||
'start_time': 162,
|
||||
'end_time': 413,
|
||||
'title': 'Radio Ga Ga',
|
||||
}, {
|
||||
'start_time': 413,
|
||||
'end_time': 454,
|
||||
'title': 'Ay Oh!',
|
||||
}, {
|
||||
'start_time': 454,
|
||||
'end_time': 728,
|
||||
'title': 'Hammer To Fall',
|
||||
}, {
|
||||
'start_time': 728,
|
||||
'end_time': 963,
|
||||
'title': 'Crazy Little Thing Called Love',
|
||||
}, {
|
||||
'start_time': 963,
|
||||
'end_time': 1038,
|
||||
'title': 'We Will Rock You',
|
||||
}, {
|
||||
'start_time': 1038,
|
||||
'end_time': 1272,
|
||||
'title': 'We Are The Champions',
|
||||
}, {
|
||||
'start_time': 1272,
|
||||
'end_time': 1477,
|
||||
'title': 'Is This The World We Created...?',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=ekYlRhALiRQ
|
||||
# pattern: <num>. <title> 0:00
|
||||
'1. Those Beaten Paths of Confusion <a href="#" onclick="yt.www.watch.player.seekTo(0*60+00);return false;">0:00</a><br />2. Beyond the Shadows of Emptiness & Nothingness <a href="#" onclick="yt.www.watch.player.seekTo(11*60+47);return false;">11:47</a><br />3. Poison Yourself...With Thought <a href="#" onclick="yt.www.watch.player.seekTo(26*60+30);return false;">26:30</a><br />4. The Agents of Transformation <a href="#" onclick="yt.www.watch.player.seekTo(35*60+57);return false;">35:57</a><br />5. Drowning in the Pain of Consciousness <a href="#" onclick="yt.www.watch.player.seekTo(44*60+32);return false;">44:32</a><br />6. Deny the Disease of Life <a href="#" onclick="yt.www.watch.player.seekTo(53*60+07);return false;">53:07</a><br /><br />More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity<br /><br />No copyright is intended. The rights to this video are assumed by the owner and its affiliates.',
|
||||
4009,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 707,
|
||||
'title': '1. Those Beaten Paths of Confusion',
|
||||
}, {
|
||||
'start_time': 707,
|
||||
'end_time': 1590,
|
||||
'title': '2. Beyond the Shadows of Emptiness & Nothingness',
|
||||
}, {
|
||||
'start_time': 1590,
|
||||
'end_time': 2157,
|
||||
'title': '3. Poison Yourself...With Thought',
|
||||
}, {
|
||||
'start_time': 2157,
|
||||
'end_time': 2672,
|
||||
'title': '4. The Agents of Transformation',
|
||||
}, {
|
||||
'start_time': 2672,
|
||||
'end_time': 3187,
|
||||
'title': '5. Drowning in the Pain of Consciousness',
|
||||
}, {
|
||||
'start_time': 3187,
|
||||
'end_time': 4009,
|
||||
'title': '6. Deny the Disease of Life',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=WjL4pSzog9w
|
||||
# pattern: 00:00 <title>
|
||||
'<a href="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" class="yt-uix-servicelink " data-target-new-window="True" data-servicelink="CDAQ6TgYACITCNf1raqT2dMCFdRjGAod_o0CBSj4HQ" data-url="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" rel="nofollow noopener" target="_blank">https://arizmenda.bandcamp.com/merch/...</a><br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> Christening Unborn Deformities <br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+08);return false;">07:08</a> Taste of Purity<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+16);return false;">16:16</a> Sculpting Sins of a Universal Tongue<br /><a href="#" onclick="yt.www.watch.player.seekTo(24*60+45);return false;">24:45</a> Birth<br /><a href="#" onclick="yt.www.watch.player.seekTo(31*60+24);return false;">31:24</a> Neves<br /><a href="#" onclick="yt.www.watch.player.seekTo(37*60+55);return false;">37:55</a> Libations in Limbo',
|
||||
2705,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 428,
|
||||
'title': 'Christening Unborn Deformities',
|
||||
}, {
|
||||
'start_time': 428,
|
||||
'end_time': 976,
|
||||
'title': 'Taste of Purity',
|
||||
}, {
|
||||
'start_time': 976,
|
||||
'end_time': 1485,
|
||||
'title': 'Sculpting Sins of a Universal Tongue',
|
||||
}, {
|
||||
'start_time': 1485,
|
||||
'end_time': 1884,
|
||||
'title': 'Birth',
|
||||
}, {
|
||||
'start_time': 1884,
|
||||
'end_time': 2275,
|
||||
'title': 'Neves',
|
||||
}, {
|
||||
'start_time': 2275,
|
||||
'end_time': 2705,
|
||||
'title': 'Libations in Limbo',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=o3r1sn-t3is
|
||||
# pattern: <title> 00:00 <note>
|
||||
'Download this show in MP3: <a href="http://sh.st/njZKK" class="yt-uix-servicelink " data-url="http://sh.st/njZKK" data-target-new-window="True" data-servicelink="CDAQ6TgYACITCK3j8_6o2dMCFVDCGAoduVAKKij4HQ" rel="nofollow noopener" target="_blank">http://sh.st/njZKK</a><br /><br />Setlist:<br />I-E-A-I-A-I-O <a href="#" onclick="yt.www.watch.player.seekTo(00*60+45);return false;">00:45</a><br />Suite-Pee <a href="#" onclick="yt.www.watch.player.seekTo(4*60+26);return false;">4:26</a> (Incomplete)<br />Attack <a href="#" onclick="yt.www.watch.player.seekTo(5*60+31);return false;">5:31</a> (First live performance since 2011)<br />Prison Song <a href="#" onclick="yt.www.watch.player.seekTo(8*60+42);return false;">8:42</a><br />Know <a href="#" onclick="yt.www.watch.player.seekTo(12*60+32);return false;">12:32</a> (First live performance since 2011)<br />Aerials <a href="#" onclick="yt.www.watch.player.seekTo(15*60+32);return false;">15:32</a><br />Soldier Side - Intro <a href="#" onclick="yt.www.watch.player.seekTo(19*60+13);return false;">19:13</a><br />B.Y.O.B. <a href="#" onclick="yt.www.watch.player.seekTo(20*60+09);return false;">20:09</a><br />Soil <a href="#" onclick="yt.www.watch.player.seekTo(24*60+32);return false;">24:32</a><br />Darts <a href="#" onclick="yt.www.watch.player.seekTo(27*60+48);return false;">27:48</a><br />Radio/Video <a href="#" onclick="yt.www.watch.player.seekTo(30*60+38);return false;">30:38</a><br />Hypnotize <a href="#" onclick="yt.www.watch.player.seekTo(35*60+05);return false;">35:05</a><br />Temper <a href="#" onclick="yt.www.watch.player.seekTo(38*60+08);return false;">38:08</a> (First live performance since 1999)<br />CUBErt <a href="#" onclick="yt.www.watch.player.seekTo(41*60+00);return false;">41:00</a><br />Needles <a href="#" onclick="yt.www.watch.player.seekTo(42*60+57);return false;">42:57</a><br />Deer Dance <a href="#" onclick="yt.www.watch.player.seekTo(46*60+27);return false;">46:27</a><br />Bounce <a href="#" onclick="yt.www.watch.player.seekTo(49*60+38);return false;">49:38</a><br />Suggestions <a href="#" onclick="yt.www.watch.player.seekTo(51*60+25);return false;">51:25</a><br />Psycho <a href="#" onclick="yt.www.watch.player.seekTo(53*60+52);return false;">53:52</a><br />Chop Suey! <a href="#" onclick="yt.www.watch.player.seekTo(58*60+13);return false;">58:13</a><br />Lonely Day <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+01*60+15);return false;">1:01:15</a><br />Question! <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+04*60+14);return false;">1:04:14</a><br />Lost in Hollywood <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+08*60+10);return false;">1:08:10</a><br />Vicinity of Obscenity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+13*60+40);return false;">1:13:40</a>(First live performance since 2012)<br />Forest <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+16*60+17);return false;">1:16:17</a><br />Cigaro <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+20*60+02);return false;">1:20:02</a><br />Toxicity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+23*60+57);return false;">1:23:57</a>(with Chino Moreno)<br />Sugar <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+27*60+53);return false;">1:27:53</a>',
|
||||
5640,
|
||||
[{
|
||||
'start_time': 45,
|
||||
'end_time': 266,
|
||||
'title': 'I-E-A-I-A-I-O',
|
||||
}, {
|
||||
'start_time': 266,
|
||||
'end_time': 331,
|
||||
'title': 'Suite-Pee (Incomplete)',
|
||||
}, {
|
||||
'start_time': 331,
|
||||
'end_time': 522,
|
||||
'title': 'Attack (First live performance since 2011)',
|
||||
}, {
|
||||
'start_time': 522,
|
||||
'end_time': 752,
|
||||
'title': 'Prison Song',
|
||||
}, {
|
||||
'start_time': 752,
|
||||
'end_time': 932,
|
||||
'title': 'Know (First live performance since 2011)',
|
||||
}, {
|
||||
'start_time': 932,
|
||||
'end_time': 1153,
|
||||
'title': 'Aerials',
|
||||
}, {
|
||||
'start_time': 1153,
|
||||
'end_time': 1209,
|
||||
'title': 'Soldier Side - Intro',
|
||||
}, {
|
||||
'start_time': 1209,
|
||||
'end_time': 1472,
|
||||
'title': 'B.Y.O.B.',
|
||||
}, {
|
||||
'start_time': 1472,
|
||||
'end_time': 1668,
|
||||
'title': 'Soil',
|
||||
}, {
|
||||
'start_time': 1668,
|
||||
'end_time': 1838,
|
||||
'title': 'Darts',
|
||||
}, {
|
||||
'start_time': 1838,
|
||||
'end_time': 2105,
|
||||
'title': 'Radio/Video',
|
||||
}, {
|
||||
'start_time': 2105,
|
||||
'end_time': 2288,
|
||||
'title': 'Hypnotize',
|
||||
}, {
|
||||
'start_time': 2288,
|
||||
'end_time': 2460,
|
||||
'title': 'Temper (First live performance since 1999)',
|
||||
}, {
|
||||
'start_time': 2460,
|
||||
'end_time': 2577,
|
||||
'title': 'CUBErt',
|
||||
}, {
|
||||
'start_time': 2577,
|
||||
'end_time': 2787,
|
||||
'title': 'Needles',
|
||||
}, {
|
||||
'start_time': 2787,
|
||||
'end_time': 2978,
|
||||
'title': 'Deer Dance',
|
||||
}, {
|
||||
'start_time': 2978,
|
||||
'end_time': 3085,
|
||||
'title': 'Bounce',
|
||||
}, {
|
||||
'start_time': 3085,
|
||||
'end_time': 3232,
|
||||
'title': 'Suggestions',
|
||||
}, {
|
||||
'start_time': 3232,
|
||||
'end_time': 3493,
|
||||
'title': 'Psycho',
|
||||
}, {
|
||||
'start_time': 3493,
|
||||
'end_time': 3675,
|
||||
'title': 'Chop Suey!',
|
||||
}, {
|
||||
'start_time': 3675,
|
||||
'end_time': 3854,
|
||||
'title': 'Lonely Day',
|
||||
}, {
|
||||
'start_time': 3854,
|
||||
'end_time': 4090,
|
||||
'title': 'Question!',
|
||||
}, {
|
||||
'start_time': 4090,
|
||||
'end_time': 4420,
|
||||
'title': 'Lost in Hollywood',
|
||||
}, {
|
||||
'start_time': 4420,
|
||||
'end_time': 4577,
|
||||
'title': 'Vicinity of Obscenity (First live performance since 2012)',
|
||||
}, {
|
||||
'start_time': 4577,
|
||||
'end_time': 4802,
|
||||
'title': 'Forest',
|
||||
}, {
|
||||
'start_time': 4802,
|
||||
'end_time': 5037,
|
||||
'title': 'Cigaro',
|
||||
}, {
|
||||
'start_time': 5037,
|
||||
'end_time': 5273,
|
||||
'title': 'Toxicity (with Chino Moreno)',
|
||||
}, {
|
||||
'start_time': 5273,
|
||||
'end_time': 5640,
|
||||
'title': 'Sugar',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=PkYLQbsqCE8
|
||||
# pattern: <num> - <title> [<latinized title>] 0:00:00
|
||||
'''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.<br /><br />"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions<br />Released on 6 panel digipak CD, limited to 100 copies only<br />And digital format on Bandcamp<br /><br />Tracklist<br /><br />1 - Во прах [Vo prakh] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;">0:00:00</a><br />2 - Искупление [Iskupleniye] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+08*60+10);return false;">0:08:10</a><br />3 - Из серпов луны...[Iz serpov luny] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+14*60+30);return false;">0:14:30</a><br /><br />Links:<br /><a href="https://deathknellprod.bandcamp.com/album/--2" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://deathknellprod.bandcamp.com/album/--2" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://deathknellprod.bandcamp.com/a...</a><br /><a href="https://www.facebook.com/DeathKnellProd/" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://www.facebook.com/DeathKnellProd/" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://www.facebook.com/DeathKnellProd/</a><br /><br /><br />I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.<br /><br />------------------------------------------------------------------<br /><br />Subscribe for more videos like this.<br />My link: <a href="https://web.facebook.com/AttackOfTheDragons" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://web.facebook.com/AttackOfTheDragons" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://web.facebook.com/AttackOfTheD...</a>''',
|
||||
1138,
|
||||
[{
|
||||
'start_time': 0,
|
||||
'end_time': 490,
|
||||
'title': '1 - Во прах [Vo prakh]',
|
||||
}, {
|
||||
'start_time': 490,
|
||||
'end_time': 870,
|
||||
'title': '2 - Искупление [Iskupleniye]',
|
||||
}, {
|
||||
'start_time': 870,
|
||||
'end_time': 1138,
|
||||
'title': '3 - Из серпов луны...[Iz serpov luny]',
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://www.youtube.com/watch?v=xZW70zEasOk
|
||||
# time point more than duration
|
||||
'''● LCS Spring finals: Saturday and Sunday from <a href="#" onclick="yt.www.watch.player.seekTo(13*60+30);return false;">13:30</a> outside the venue! <br />● PAX East: Fri, Sat & Sun - more info in tomorrows video on the main channel!''',
|
||||
283,
|
||||
[]
|
||||
),
|
||||
]
|
||||
|
||||
def test_youtube_chapters(self):
|
||||
for description, duration, expected_chapters in self._TEST_CASES:
|
||||
ie = YoutubeIE()
|
||||
expect_value(
|
||||
self, ie._extract_chapters_from_description(description, duration),
|
||||
expected_chapters, None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -12,6 +12,7 @@ from test.helper import FakeYDL
|
||||
|
||||
from youtube_dl.extractor import (
|
||||
YoutubePlaylistIE,
|
||||
YoutubeTabIE,
|
||||
YoutubeIE,
|
||||
)
|
||||
|
||||
@@ -57,14 +58,22 @@ class TestYoutubeLists(unittest.TestCase):
|
||||
entries = result['entries']
|
||||
self.assertEqual(len(entries), 100)
|
||||
|
||||
def test_youtube_flat_playlist_titles(self):
|
||||
def test_youtube_flat_playlist_extraction(self):
|
||||
dl = FakeYDL()
|
||||
dl.params['extract_flat'] = True
|
||||
ie = YoutubePlaylistIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
|
||||
ie = YoutubeTabIE(dl)
|
||||
result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc')
|
||||
self.assertIsPlaylist(result)
|
||||
for entry in result['entries']:
|
||||
self.assertTrue(entry.get('title'))
|
||||
entries = list(result['entries'])
|
||||
self.assertTrue(len(entries) == 1)
|
||||
video = entries[0]
|
||||
self.assertEqual(video['_type'], 'url_transparent')
|
||||
self.assertEqual(video['ie_key'], 'Youtube')
|
||||
self.assertEqual(video['id'], 'BaW_jenozKc')
|
||||
self.assertEqual(video['url'], 'BaW_jenozKc')
|
||||
self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐')
|
||||
self.assertEqual(video['duration'], 10)
|
||||
self.assertEqual(video['uploader'], 'Philipp Hagemeister')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -19,55 +19,46 @@ from youtube_dl.compat import compat_str, compat_urlretrieve
|
||||
_TESTS = [
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
|
||||
'js',
|
||||
86,
|
||||
'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
|
||||
'js',
|
||||
85,
|
||||
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js',
|
||||
'js',
|
||||
90,
|
||||
']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
|
||||
'js',
|
||||
84,
|
||||
'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
|
||||
'js',
|
||||
'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA',
|
||||
'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2',
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js',
|
||||
'js',
|
||||
84,
|
||||
'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js',
|
||||
'js',
|
||||
83,
|
||||
'123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js',
|
||||
'js',
|
||||
'4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288',
|
||||
'82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B'
|
||||
),
|
||||
(
|
||||
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
|
||||
'js',
|
||||
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
|
||||
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
|
||||
)
|
||||
@@ -78,6 +69,10 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'),
|
||||
('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'),
|
||||
# obsolete
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||
@@ -86,13 +81,9 @@ class TestPlayerInfo(unittest.TestCase):
|
||||
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||
('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
|
||||
('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
|
||||
)
|
||||
for player_url, expected_player_id in PLAYER_URLS:
|
||||
expected_player_type = player_url.split('.')[-1]
|
||||
player_type, player_id = YoutubeIE._extract_player_info(player_url)
|
||||
self.assertEqual(player_type, expected_player_type)
|
||||
player_id = YoutubeIE._extract_player_info(player_url)
|
||||
self.assertEqual(player_id, expected_player_id)
|
||||
|
||||
|
||||
@@ -104,13 +95,13 @@ class TestSignature(unittest.TestCase):
|
||||
os.mkdir(self.TESTDATA_DIR)
|
||||
|
||||
|
||||
def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
def make_tfunc(url, sig_input, expected_sig):
|
||||
m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url)
|
||||
assert m, '%r should follow URL format' % url
|
||||
test_id = m.group(1)
|
||||
|
||||
def test_func(self):
|
||||
basename = 'player-%s.%s' % (test_id, stype)
|
||||
basename = 'player-%s.js' % test_id
|
||||
fn = os.path.join(self.TESTDATA_DIR, basename)
|
||||
|
||||
if not os.path.exists(fn):
|
||||
@@ -118,22 +109,16 @@ def make_tfunc(url, stype, sig_input, expected_sig):
|
||||
|
||||
ydl = FakeYDL()
|
||||
ie = YoutubeIE(ydl)
|
||||
if stype == 'js':
|
||||
with io.open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
func = ie._parse_sig_js(jscode)
|
||||
else:
|
||||
assert stype == 'swf'
|
||||
with open(fn, 'rb') as testf:
|
||||
swfcode = testf.read()
|
||||
func = ie._parse_sig_swf(swfcode)
|
||||
with io.open(fn, encoding='utf-8') as testf:
|
||||
jscode = testf.read()
|
||||
func = ie._parse_sig_js(jscode)
|
||||
src_sig = (
|
||||
compat_str(string.printable[:sig_input])
|
||||
if isinstance(sig_input, int) else sig_input)
|
||||
got_sig = func(src_sig)
|
||||
self.assertEqual(got_sig, expected_sig)
|
||||
|
||||
test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
|
||||
test_func.__name__ = str('test_signature_js_' + test_id)
|
||||
setattr(TestSignature, test_func.__name__, test_func)
|
||||
|
||||
|
||||
|
@@ -1,14 +1,15 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import re
|
||||
import time
|
||||
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AbcNewsVideoIE(AMPIE):
|
||||
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||
(?:
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
(?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/(?:embed|itemfeed)\?.*?\bid=
|
||||
)|
|
||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||
)
|
||||
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||
'duration': 180,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1380454200,
|
||||
'upload_date': '20130929',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
||||
# Youtube Embeds
|
||||
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
|
||||
'info_dict': {
|
||||
'id': '10505354',
|
||||
'ext': 'flv',
|
||||
'display_id': 'dramatic-video-rare-death-job-america',
|
||||
'title': 'Occupational Hazards',
|
||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20100428',
|
||||
'timestamp': 1272412800,
|
||||
'id': '51286501',
|
||||
'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
|
||||
'description': 'Billingsley went from a child actor to Hollywood power player.',
|
||||
},
|
||||
'add_ie': ['AbcNewsVideo'],
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||
'info_dict': {
|
||||
'id': '38897857',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||
'upload_date': '20160515',
|
||||
'timestamp': 1463329500,
|
||||
'upload_date': '20160505',
|
||||
'timestamp': 1462442280,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# inline.type == 'video'
|
||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
story_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, story_id)
|
||||
story = self._parse_json(self._search_regex(
|
||||
r"window\['__abcnews__'\]\s*=\s*({.+?});",
|
||||
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
|
||||
article_contents = story.get('articleContents') or {}
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
||||
def entries():
|
||||
featured_video = story.get('featuredVideo') or {}
|
||||
feed = try_get(featured_video, lambda x: x['video']['feed'])
|
||||
if feed:
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': featured_video.get('id'),
|
||||
'title': featured_video.get('name'),
|
||||
'url': feed,
|
||||
'thumbnail': featured_video.get('images'),
|
||||
'description': featured_video.get('description'),
|
||||
'timestamp': parse_iso8601(featured_video.get('uploadDate')),
|
||||
'duration': parse_duration(featured_video.get('duration')),
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
}
|
||||
|
||||
youtube_url = YoutubeIE._extract_url(webpage)
|
||||
for inline in (article_contents.get('inlines') or []):
|
||||
inline_type = inline.get('type')
|
||||
if inline_type == 'iframe':
|
||||
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
|
||||
if iframe_url:
|
||||
yield self.url_result(iframe_url)
|
||||
elif inline_type == 'video':
|
||||
video_id = inline.get('id')
|
||||
if video_id:
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
|
||||
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
|
||||
'description': inline.get('description'),
|
||||
'duration': parse_duration(inline.get('duration')),
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
}
|
||||
|
||||
timestamp = None
|
||||
date_str = self._html_search_regex(
|
||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
||||
webpage, 'timestamp', fatal=False)
|
||||
if date_str:
|
||||
tz_offset = 0
|
||||
if date_str.endswith(' ET'): # Eastern Time
|
||||
tz_offset = -5
|
||||
date_str = date_str[:-3]
|
||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
||||
for date_format in date_formats:
|
||||
try:
|
||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
||||
except ValueError:
|
||||
continue
|
||||
if timestamp is not None:
|
||||
timestamp -= tz_offset * 3600
|
||||
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||
'url': full_video_url,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
if youtube_url:
|
||||
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
return entry
|
||||
return self.playlist_result(
|
||||
entries(), story_id, article_contents.get('headline'),
|
||||
article_contents.get('subHead'))
|
||||
|
@@ -26,6 +26,7 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
|
||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
@@ -129,19 +133,42 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
}])
|
||||
return subtitles
|
||||
|
||||
def _real_initialize(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username:
|
||||
return
|
||||
try:
|
||||
access_token = (self._download_json(
|
||||
self._API_BASE_URL + 'authentication/login', None,
|
||||
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
'rememberMe': False,
|
||||
'source': 'Web',
|
||||
'username': username,
|
||||
})) or {}).get('accessToken')
|
||||
if access_token:
|
||||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata')['player']
|
||||
'Downloading player config JSON metadata',
|
||||
headers=self._HEADERS)['player']
|
||||
options = player['options']
|
||||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
raise ExtractorError(
|
||||
'This video is only available for paying users', expected=True)
|
||||
# self.raise_login_required() # FIXME: Login is not implemented
|
||||
self.raise_login_required()
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
@@ -188,8 +215,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
else:
|
||||
raise ExtractorError(message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
raise ExtractorError('Giving up retrying')
|
||||
|
||||
|
@@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||
'info_dict': {
|
||||
'id': 'SH012427480000',
|
||||
'id': 'SERIES1574',
|
||||
'title': 'Ancient Aliens',
|
||||
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||
},
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -6,25 +6,21 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class APAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||
'info_dict': {
|
||||
'id': 'jjv85FdZ',
|
||||
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'ext': 'mp4',
|
||||
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 254,
|
||||
'timestamp': 1519211149,
|
||||
'upload_date': '20180221',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||
@@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
|
||||
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
description = extract('description')
|
||||
thumbnail = extract('poster', 'thumbnail')
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = url_or_none(source.get('file'))
|
||||
for format_id in ('hls', 'progressive'):
|
||||
source_url = url_or_none(extract(format_id))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
@@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)\.mp4', source_url, 'height', default=None))
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -2,15 +2,17 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ArchiveOrgIE(InfoExtractor):
|
||||
IE_NAME = 'archive.org'
|
||||
IE_DESC = 'archive.org videos'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||
@@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'ext': 'ogg',
|
||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||
'upload_date': '19681210',
|
||||
'uploader': 'SRI International'
|
||||
'creator': 'SRI International',
|
||||
'release_date': '19681210',
|
||||
'uploader': 'SRI International',
|
||||
'timestamp': 1268695290,
|
||||
'upload_date': '20100315',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
@@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'id': 'Cops1922',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
|
||||
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||
'timestamp': 1387699629,
|
||||
'upload_date': '20131222',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://archive.org/embed/' + video_id, video_id)
|
||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist'), video_id)
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
|
||||
playlist = None
|
||||
play8 = self._search_regex(
|
||||
r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
|
||||
'playlist', default=None)
|
||||
if play8:
|
||||
attrs = extract_attributes(play8)
|
||||
playlist = attrs.get('value')
|
||||
if not playlist:
|
||||
# Old jwplayer fallback
|
||||
playlist = self._search_regex(
|
||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||
webpage, 'jwplayer playlist', default='[]')
|
||||
jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
|
||||
if jwplayer_playlist:
|
||||
info = self._parse_jwplayer_data(
|
||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||
else:
|
||||
# HTML5 media fallback
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
info['id'] = video_id
|
||||
|
||||
def get_optional(metadata, field):
|
||||
return metadata.get(field, [None])[0]
|
||||
@@ -58,8 +84,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': clean_html(get_optional(metadata, 'description')),
|
||||
})
|
||||
if info.get('_type') != 'playlist':
|
||||
creator = get_optional(metadata, 'creator')
|
||||
info.update({
|
||||
'uploader': get_optional(metadata, 'creator'),
|
||||
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
'creator': creator,
|
||||
'release_date': unified_strdate(get_optional(metadata, 'date')),
|
||||
'uploader': get_optional(metadata, 'publisher') or creator,
|
||||
'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
|
||||
'language': get_optional(metadata, 'language'),
|
||||
})
|
||||
return info
|
||||
|
@@ -284,20 +284,42 @@ class ARDIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for a in video_node.findall('.//asset'):
|
||||
file_name = xpath_text(a, './fileName', default=None)
|
||||
if not file_name:
|
||||
continue
|
||||
format_type = a.attrib.get('type')
|
||||
format_url = url_or_none(file_name)
|
||||
if format_url:
|
||||
ext = determine_ext(file_name)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_type or 'hls', fatal=False))
|
||||
continue
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(format_url, {'hdcore': '3.7.0'}),
|
||||
display_id, f4m_id=format_type or 'hds', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'format_id': a.attrib['type'],
|
||||
'width': int_or_none(a.find('./frameWidth').text),
|
||||
'height': int_or_none(a.find('./frameHeight').text),
|
||||
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
||||
'abr': int_or_none(a.find('./bitrateAudio').text),
|
||||
'vcodec': a.find('./codecVideo').text,
|
||||
'tbr': int_or_none(a.find('./totalBitrate').text),
|
||||
'format_id': format_type,
|
||||
'width': int_or_none(xpath_text(a, './frameWidth')),
|
||||
'height': int_or_none(xpath_text(a, './frameHeight')),
|
||||
'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
|
||||
'abr': int_or_none(xpath_text(a, './bitrateAudio')),
|
||||
'vcodec': xpath_text(a, './codecVideo'),
|
||||
'tbr': int_or_none(xpath_text(a, './totalBitrate')),
|
||||
}
|
||||
if a.find('./serverPrefix').text:
|
||||
f['url'] = a.find('./serverPrefix').text
|
||||
f['playpath'] = a.find('./fileName').text
|
||||
server_prefix = xpath_text(a, './serverPrefix', default=None)
|
||||
if server_prefix:
|
||||
f.update({
|
||||
'url': server_prefix,
|
||||
'playpath': file_name,
|
||||
})
|
||||
else:
|
||||
f['url'] = a.find('./fileName').text
|
||||
if not format_url:
|
||||
continue
|
||||
f['url'] = format_url
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||
'is_live': is_live,
|
||||
'uploader_id': video_data.get('user_id'),
|
||||
}
|
||||
|
||||
|
||||
@@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'upload_date': '20150107',
|
||||
'timestamp': 1420588800,
|
||||
'uploader_id': '71',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
|
@@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'md5': '670b2d73f48549da032861130488c681',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
'upload_date': '20150723',
|
||||
'timestamp': 1437679032,
|
||||
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Unable to download f4m manifest'
|
||||
]
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
@@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
@@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
if site == 'bravotv':
|
||||
site = 'bravo'
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
||||
adobe_pass.get('adobePassResourceId') or site,
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
||||
url, release_pid,
|
||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
|
@@ -7,19 +7,21 @@ from .common import InfoExtractor
|
||||
from .gigya import GigyaBaseIE
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
strip_or_none,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||
@@ -332,3 +334,51 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'display_id': display_id,
|
||||
'season_number': int_or_none(page.get('episode_season')),
|
||||
})
|
||||
|
||||
|
||||
class DagelijkseKostIE(InfoExtractor):
|
||||
IE_DESC = 'dagelijksekost.een.be'
|
||||
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||
'display_id': 'hachis-parmentier-met-witloof',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hachis parmentier met witloof',
|
||||
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 283.02,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = strip_or_none(get_element_by_class(
|
||||
'dish-metadata__title', webpage
|
||||
) or self._html_search_meta(
|
||||
'twitter:title', webpage))
|
||||
|
||||
description = clean_html(get_element_by_class(
|
||||
'dish-description', webpage)
|
||||
) or self._html_search_meta(
|
||||
('description', 'twitter:description', 'og:description'),
|
||||
webpage)
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||
group='id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
@@ -1,15 +1,18 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import calendar
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_timezone,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -24,8 +27,9 @@ class CCMAIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'L\'espot de La Marató de TV3',
|
||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||
'timestamp': 1470918540,
|
||||
'upload_date': '20160811',
|
||||
'timestamp': 1478608140,
|
||||
'upload_date': '20161108',
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||
@@ -35,8 +39,24 @@ class CCMAIE(InfoExtractor):
|
||||
'ext': 'mp3',
|
||||
'title': 'El Consell de Savis analitza el derbi',
|
||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||
'upload_date': '20171205',
|
||||
'timestamp': 1512507300,
|
||||
'upload_date': '20170512',
|
||||
'timestamp': 1494622500,
|
||||
'vcodec': 'none',
|
||||
'categories': ['Esports'],
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||
'info_dict': {
|
||||
'id': '6031387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||
'timestamp': 1582577700,
|
||||
'upload_date': '20200224',
|
||||
'subtitles': 'mincount:4',
|
||||
'age_limit': 16,
|
||||
'series': 'Crims',
|
||||
}
|
||||
}]
|
||||
|
||||
@@ -72,17 +92,28 @@ class CCMAIE(InfoExtractor):
|
||||
|
||||
informacio = media['informacio']
|
||||
title = informacio['titol']
|
||||
durada = informacio.get('durada', {})
|
||||
durada = informacio.get('durada') or {}
|
||||
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
|
||||
tematica = try_get(informacio, lambda x: x['tematica']['text'])
|
||||
|
||||
timestamp = None
|
||||
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||
try:
|
||||
timezone, data_utc = extract_timezone(data_utc)
|
||||
timestamp = calendar.timegm((datetime.datetime.strptime(
|
||||
data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
subtitles = {}
|
||||
subtitols = media.get('subtitols', {})
|
||||
if subtitols:
|
||||
sub_url = subtitols.get('url')
|
||||
subtitols = media.get('subtitols') or []
|
||||
if isinstance(subtitols, dict):
|
||||
subtitols = [subtitols]
|
||||
for st in subtitols:
|
||||
sub_url = st.get('url')
|
||||
if sub_url:
|
||||
subtitles.setdefault(
|
||||
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
|
||||
st.get('iso') or st.get('text') or 'ca', []).append({
|
||||
'url': sub_url,
|
||||
})
|
||||
|
||||
@@ -97,6 +128,16 @@ class CCMAIE(InfoExtractor):
|
||||
'height': int_or_none(imatges.get('alcada')),
|
||||
}]
|
||||
|
||||
age_limit = None
|
||||
codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
|
||||
if codi_etic:
|
||||
codi_etic_s = codi_etic.split('_')
|
||||
if len(codi_etic_s) == 2:
|
||||
if codi_etic_s[1] == 'TP':
|
||||
age_limit = 0
|
||||
else:
|
||||
age_limit = int_or_none(codi_etic_s[1])
|
||||
|
||||
return {
|
||||
'id': media_id,
|
||||
'title': title,
|
||||
@@ -106,4 +147,9 @@ class CCMAIE(InfoExtractor):
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'age_limit': age_limit,
|
||||
'alt_title': informacio.get('titol_complet'),
|
||||
'episode_number': int_or_none(informacio.get('capitol')),
|
||||
'categories': [tematica] if tematica else None,
|
||||
'series': informacio.get('programa'),
|
||||
}
|
||||
|
@@ -95,8 +95,11 @@ class CDAIE(InfoExtractor):
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage):
|
||||
self.raise_geo_restricted()
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
|
@@ -2064,7 +2064,7 @@ class InfoExtractor(object):
|
||||
})
|
||||
return entries
|
||||
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
|
||||
def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
res = self._download_xml_handle(
|
||||
mpd_url, video_id,
|
||||
note=note or 'Downloading MPD manifest',
|
||||
@@ -2078,10 +2078,9 @@ class InfoExtractor(object):
|
||||
mpd_base_url = base_url(urlh.geturl())
|
||||
|
||||
return self._parse_mpd_formats(
|
||||
mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
|
||||
formats_dict=formats_dict, mpd_url=mpd_url)
|
||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
|
||||
def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
@@ -2359,15 +2358,7 @@ class InfoExtractor(object):
|
||||
else:
|
||||
# Assuming direct URL to unfragmented media.
|
||||
f['url'] = base_url
|
||||
|
||||
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
|
||||
# is not necessarily unique within a Period thus formats with
|
||||
# the same `format_id` are quite possible. There are numerous examples
|
||||
# of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/13919)
|
||||
full_info = formats_dict.get(representation_id, {}).copy()
|
||||
full_info.update(f)
|
||||
formats.append(full_info)
|
||||
formats.append(f)
|
||||
else:
|
||||
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
||||
return formats
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -10,11 +11,13 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
(?:www\.)?(?P<host>d
|
||||
@@ -24,7 +27,7 @@ class DPlayIE(InfoExtractor):
|
||||
)
|
||||
)|
|
||||
(?P<subdomain_country>es|it)\.dplay\.com
|
||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||
)/[^/]+''' + _PATH_REGEX
|
||||
|
||||
_TESTS = [{
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
@@ -151,56 +154,79 @@ class DPlayIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _process_errors(self, e, geo_countries):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code in ('access.denied.missingpackage', 'invalid.token'):
|
||||
raise ExtractorError(
|
||||
'This video is only available for registered users. You may want to use --cookies.', expected=True)
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['Authorization'] = 'Bearer ' + self._download_json(
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
video_id, headers=headers)['data']['attributes']['streaming']
|
||||
streaming_list = []
|
||||
for format_id, format_dict in streaming.items():
|
||||
streaming_list.append({
|
||||
'type': format_id,
|
||||
'url': format_dict.get('url'),
|
||||
})
|
||||
return streaming_list
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||
geo_countries = [country.upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': geo_countries,
|
||||
})
|
||||
disco_base = 'https://%s/' % disco_host
|
||||
token = self._download_json(
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
video = self._download_json(
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'fields[channel]': 'name',
|
||||
'fields[image]': 'height,src,width',
|
||||
'fields[show]': 'name',
|
||||
'fields[tag]': 'name',
|
||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
self._update_disco_api_headers(headers, disco_base, display_id, realm)
|
||||
try:
|
||||
video = self._download_json(
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'fields[channel]': 'name',
|
||||
'fields[image]': 'height,src,width',
|
||||
'fields[show]': 'name',
|
||||
'fields[tag]': 'name',
|
||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name'].strip()
|
||||
formats = []
|
||||
try:
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
display_id, headers=headers)['data']['attributes']['streaming']
|
||||
streaming = self._download_video_playback_info(
|
||||
disco_base, video_id, headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code == 'access.denied.missingpackage':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
for format_id, format_dict in streaming.items():
|
||||
for format_dict in streaming:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -248,7 +274,7 @@ class DPlayIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'description': strip_or_none(info.get('description')),
|
||||
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
@@ -268,3 +294,75 @@ class DPlayIE(InfoExtractor):
|
||||
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
||||
|
||||
class DiscoveryPlusIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||
'info_dict': {
|
||||
'id': '1140794',
|
||||
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||
'ext': 'mp4',
|
||||
'title': 'Food and Family',
|
||||
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||
'duration': 2583.113,
|
||||
'timestamp': 1609304400,
|
||||
'upload_date': '20201230',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Property Brothers: Forever Home',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}]
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
|
||||
|
||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||
return self._download_json(
|
||||
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
|
||||
|
||||
|
||||
class HGTVDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
|
||||
_TESTS = [{
|
||||
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||
'info_dict': {
|
||||
'id': '151205',
|
||||
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wer braucht schon eine Toilette',
|
||||
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||
'duration': 1177.024,
|
||||
'timestamp': 1595705400,
|
||||
'upload_date': '20200725',
|
||||
'creator': 'HGTV',
|
||||
'series': 'Tiny House - klein, aber oho',
|
||||
'season_number': 3,
|
||||
'episode_number': 3,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||
|
@@ -12,7 +12,14 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class EggheadCourseIE(InfoExtractor):
|
||||
class EggheadBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, resource, fatal=True):
|
||||
return self._download_json(
|
||||
'https://app.egghead.io/api/v1/' + path,
|
||||
video_id, 'Downloading %s JSON' % resource, fatal=fatal)
|
||||
|
||||
|
||||
class EggheadCourseIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io course'
|
||||
IE_NAME = 'egghead:course'
|
||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||
@@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
lessons = self._download_json(
|
||||
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
|
||||
playlist_id, 'Downloading course lessons JSON')
|
||||
series_path = 'series/' + playlist_id
|
||||
lessons = self._call_api(
|
||||
series_path + '/lessons', playlist_id, 'course lessons')
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
@@ -44,9 +50,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
entries.append(self.url_result(
|
||||
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
|
||||
|
||||
course = self._download_json(
|
||||
'https://egghead.io/api/v1/series/%s' % playlist_id,
|
||||
playlist_id, 'Downloading course JSON', fatal=False) or {}
|
||||
course = self._call_api(
|
||||
series_path, playlist_id, 'course', False) or {}
|
||||
|
||||
playlist_id = course.get('id')
|
||||
if playlist_id:
|
||||
@@ -57,7 +62,7 @@ class EggheadCourseIE(InfoExtractor):
|
||||
course.get('description'))
|
||||
|
||||
|
||||
class EggheadLessonIE(InfoExtractor):
|
||||
class EggheadLessonIE(EggheadBaseIE):
|
||||
IE_DESC = 'egghead.io lesson'
|
||||
IE_NAME = 'egghead:lesson'
|
||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||
@@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
|
||||
'upload_date': '20161209',
|
||||
'duration': 304,
|
||||
'view_count': 0,
|
||||
'tags': ['javascript', 'free'],
|
||||
'tags': 'count:2',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
lesson = self._download_json(
|
||||
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
|
||||
lesson = self._call_api(
|
||||
'lessons/' + display_id, display_id, 'lesson')
|
||||
|
||||
lesson_id = compat_str(lesson['id'])
|
||||
title = lesson['title']
|
||||
|
@@ -163,6 +163,7 @@ from .canvas import (
|
||||
CanvasIE,
|
||||
CanvasEenIE,
|
||||
VrtNUIE,
|
||||
DagelijkseKostIE,
|
||||
)
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
@@ -287,7 +288,11 @@ from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
)
|
||||
from .dplay import DPlayIE
|
||||
from .dplay import (
|
||||
DPlayIE,
|
||||
DiscoveryPlusIE,
|
||||
HGTVDeIE,
|
||||
)
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@@ -416,6 +421,7 @@ from .gamestar import GameStarIE
|
||||
from .gaskrank import GaskrankIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .gedidigital import GediDigitalIE
|
||||
from .generic import GenericIE
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
@@ -470,8 +476,8 @@ from .hungama import (
|
||||
from .hypem import HypemIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
OneUPIE,
|
||||
PCMagIE,
|
||||
IGNVideoIE,
|
||||
IGNArticleIE,
|
||||
)
|
||||
from .iheart import (
|
||||
IHeartRadioIE,
|
||||
@@ -1024,6 +1030,7 @@ from .safari import (
|
||||
SafariApiIE,
|
||||
SafariCourseIE,
|
||||
)
|
||||
from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
@@ -1056,6 +1063,11 @@ from .shared import (
|
||||
VivoIE,
|
||||
)
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
from .simplecast import (
|
||||
SimplecastIE,
|
||||
SimplecastEpisodeIE,
|
||||
SimplecastPodcastIE,
|
||||
)
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skyit import (
|
||||
@@ -1140,6 +1152,11 @@ from .srgssr import (
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .storyfire import (
|
||||
StoryFireIE,
|
||||
StoryFireUserIE,
|
||||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
@@ -1260,6 +1277,7 @@ from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
KatsomoIE,
|
||||
MTVUutisetArticleIE,
|
||||
)
|
||||
from .tv2dk import (
|
||||
TV2DKIE,
|
||||
@@ -1398,7 +1416,6 @@ from .vidme import (
|
||||
VidmeUserIE,
|
||||
VidmeUserLikesIE,
|
||||
)
|
||||
from .vidzi import VidziIE
|
||||
from .vier import VierIE, VierVideosIE
|
||||
from .viewlift import (
|
||||
ViewLiftIE,
|
||||
@@ -1458,6 +1475,7 @@ from .vrv import (
|
||||
VRVSeriesIE,
|
||||
)
|
||||
from .vshare import VShareIE
|
||||
from .vtm import VTMIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .vube import VubeIE
|
||||
from .vuclip import VuClipIE
|
||||
@@ -1601,5 +1619,6 @@ from .zattoo import (
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
161
youtube_dl/extractor/gedidigital.py
Normal file
161
youtube_dl/extractor/gedidigital.py
Normal file
@@ -0,0 +1,161 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class GediDigitalIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://video\.
|
||||
(?:
|
||||
(?:
|
||||
(?:espresso\.)?repubblica
|
||||
|lastampa
|
||||
|ilsecoloxix
|
||||
)|
|
||||
(?:
|
||||
iltirreno
|
||||
|messaggeroveneto
|
||||
|ilpiccolo
|
||||
|gazzettadimantova
|
||||
|mattinopadova
|
||||
|laprovinciapavese
|
||||
|tribunatreviso
|
||||
|nuovavenezia
|
||||
|gazzettadimodena
|
||||
|lanuovaferrara
|
||||
|corrierealpi
|
||||
|lasentinella
|
||||
)\.gelocal
|
||||
)\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||
'info_dict': {
|
||||
'id': '121559',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_meta(
|
||||
['twitter:title', 'og:title'], webpage, fatal=True)
|
||||
player_data = re.findall(
|
||||
r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
|
||||
webpage)
|
||||
|
||||
formats = []
|
||||
duration = thumb = None
|
||||
for t, n, v in player_data:
|
||||
if t == 'format':
|
||||
if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
|
||||
continue
|
||||
elif n.endswith('-vod-ak'):
|
||||
formats.extend(self._extract_akamai_formats(
|
||||
v, video_id, {'http': 'media.gedidigital.it'}))
|
||||
else:
|
||||
ext = determine_ext(v)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'format_id': n,
|
||||
'url': v,
|
||||
}
|
||||
if ext == 'mp3':
|
||||
abr = int_or_none(self._search_regex(
|
||||
r'-mp3-audio-(\d+)', v, 'abr', default=None))
|
||||
f.update({
|
||||
'abr': abr,
|
||||
'tbr': abr,
|
||||
'vcodec': 'none'
|
||||
})
|
||||
else:
|
||||
mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
|
||||
if mobj:
|
||||
f.update({
|
||||
'height': int(mobj.group(1)),
|
||||
'vbr': int_or_none(mobj.group(2)),
|
||||
})
|
||||
if not f.get('vbr'):
|
||||
f['vbr'] = int_or_none(self._search_regex(
|
||||
r'-video-rrtv-(\d+)', v, 'abr', default=None))
|
||||
formats.append(f)
|
||||
elif t == 'param':
|
||||
if n in ['image_full', 'image']:
|
||||
thumb = v
|
||||
elif n == 'videoDuration':
|
||||
duration = int_or_none(v)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta(
|
||||
['twitter:description', 'og:description', 'description'], webpage),
|
||||
'thumbnail': thumb or self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
}
|
@@ -128,6 +128,8 @@ from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
from .kinja import KinjaEmbedIE
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .medialaan import MedialaanIE
|
||||
from .simplecast import SimplecastIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2223,6 +2225,29 @@ class GenericIE(InfoExtractor):
|
||||
'duration': 1581,
|
||||
},
|
||||
},
|
||||
{
|
||||
# MyChannels SDK embed
|
||||
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
|
||||
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
|
||||
'md5': '90c0699c37006ef18e198c032d81739c',
|
||||
'info_dict': {
|
||||
'id': '194165',
|
||||
'ext': 'mp4',
|
||||
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
|
||||
'timestamp': 1611740340,
|
||||
'upload_date': '20210127',
|
||||
'duration': 159,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Simplecast player embed
|
||||
'url': 'https://www.bio.org/podcast',
|
||||
'info_dict': {
|
||||
'id': 'podcast',
|
||||
'title': 'I AM BIO Podcast | BIO',
|
||||
},
|
||||
'playlist_mincount': 52,
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
@@ -2462,6 +2487,9 @@ class GenericIE(InfoExtractor):
|
||||
webpage = self._webpage_read_content(
|
||||
full_response, url, video_id, prefix=first_bytes)
|
||||
|
||||
if '<title>DPG Media Privacy Gate</title>' in webpage:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||
@@ -2593,6 +2621,11 @@ class GenericIE(InfoExtractor):
|
||||
if arc_urls:
|
||||
return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key())
|
||||
|
||||
mychannels_urls = MedialaanIE._extract_urls(webpage)
|
||||
if mychannels_urls:
|
||||
return self.playlist_from_matches(
|
||||
mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2769,6 +2802,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
|
||||
|
||||
# Look for Simplecast embeds
|
||||
simplecast_urls = SimplecastIE._extract_urls(webpage)
|
||||
if simplecast_urls:
|
||||
return self.playlist_from_matches(
|
||||
simplecast_urls, video_id, video_title)
|
||||
|
||||
# Look for BBC iPlayer embed
|
||||
matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
|
||||
if matches:
|
||||
|
@@ -7,6 +7,7 @@ from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
@@ -237,7 +238,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||
if confirmation_webpage:
|
||||
confirm = self._search_regex(
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', fatal=False)
|
||||
'confirmation code', default=None)
|
||||
if confirm:
|
||||
confirmed_source_url = update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
@@ -245,6 +246,11 @@ class GoogleDriveIE(InfoExtractor):
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
else:
|
||||
self.report_warning(
|
||||
get_element_by_class('uc-error-subcaption', confirmation_webpage)
|
||||
or get_element_by_class('uc-error-caption', confirmation_webpage)
|
||||
or 'unable to extract confirmation code')
|
||||
|
||||
if not formats and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
@@ -3,230 +3,255 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class IGNIE(InfoExtractor):
|
||||
class IGNBaseIE(InfoExtractor):
|
||||
def _call_api(self, slug):
|
||||
return self._download_json(
|
||||
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||
|
||||
|
||||
class IGNIE(IGNBaseIE):
|
||||
"""
|
||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||
Some videos of it.ign.com are also supported
|
||||
"""
|
||||
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
|
||||
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
||||
IE_NAME = 'ign.com'
|
||||
_PAGE_TYPE = 'video'
|
||||
|
||||
_API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
|
||||
_EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||
'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
|
||||
'info_dict': {
|
||||
'id': '8f862beef863986b2785559b9e1aa599',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Last of Us Review',
|
||||
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||
'timestamp': 1370440800,
|
||||
'upload_date': '20130605',
|
||||
'uploader_id': 'cberidon@ign.com',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
'info_dict': {
|
||||
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '5ebbd138523268b93c9141af17bec937',
|
||||
'ext': 'mp4',
|
||||
'title': 'GTA 5 Video Review',
|
||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||
'timestamp': 1379339880,
|
||||
'upload_date': '20130916',
|
||||
'uploader_id': 'danieljkrupa@gmail.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||
'ext': 'mp4',
|
||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||
'timestamp': 1386878820,
|
||||
'upload_date': '20131212',
|
||||
'uploader_id': 'togilvie@ign.com',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'md5': '618fedb9c901fd086f6f093564ef8558',
|
||||
'info_dict': {
|
||||
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
|
||||
'timestamp': 1408047180,
|
||||
'upload_date': '20140814',
|
||||
'uploader_id': 'jamesduggan1990@gmail.com',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# videoId pattern
|
||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _find_video_id(self, webpage):
|
||||
res_id = [
|
||||
r'"video_id"\s*:\s*"(.*?)"',
|
||||
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
r'videoId"\s*:\s*"(.+?)"',
|
||||
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
|
||||
]
|
||||
return self._search_regex(res_id, webpage, 'video id', default=None)
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||
'md5': 'd2e1586d9987d40fad7867bf96a018ea',
|
||||
'info_dict': {
|
||||
'id': '8f862beef863986b2785559b9e1aa599',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Last of Us Review',
|
||||
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||
'timestamp': 1370440800,
|
||||
'upload_date': '20130605',
|
||||
'tags': 'count:9',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||
'info_dict': {
|
||||
'id': 'ee10d774b508c9b8ec07e763b9125b91',
|
||||
'ext': 'mp4',
|
||||
'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
|
||||
'description': 'md5:817a20299de610bd56f13175386da6fa',
|
||||
'timestamp': 1420571160,
|
||||
'upload_date': '20150106',
|
||||
'tags': 'count:4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name_or_id = mobj.group('name_or_id')
|
||||
page_type = mobj.group('type')
|
||||
webpage = self._download_webpage(url, name_or_id)
|
||||
if page_type != 'video':
|
||||
multiple_urls = re.findall(
|
||||
r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
||||
webpage)
|
||||
if multiple_urls:
|
||||
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': name_or_id,
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
video_id = self._find_video_id(webpage)
|
||||
if not video_id:
|
||||
return self.url_result(self._search_regex(
|
||||
self._EMBED_RE, webpage, 'embed url'))
|
||||
return self._get_video_info(video_id)
|
||||
|
||||
def _get_video_info(self, video_id):
|
||||
api_data = self._download_json(
|
||||
self._API_URL_TEMPLATE % video_id, video_id)
|
||||
display_id = self._match_id(url)
|
||||
video = self._call_api(display_id)
|
||||
video_id = video['videoId']
|
||||
metadata = video['metadata']
|
||||
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
||||
|
||||
formats = []
|
||||
m3u8_url = api_data['refs'].get('m3uUrl')
|
||||
refs = video.get('refs') or {}
|
||||
|
||||
m3u8_url = refs.get('m3uUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
f4m_url = api_data['refs'].get('f4mUrl')
|
||||
|
||||
f4m_url = refs.get('f4mUrl')
|
||||
if f4m_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
for asset in api_data['assets']:
|
||||
|
||||
for asset in (video.get('assets') or []):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': asset['url'],
|
||||
'tbr': asset.get('actual_bitrate_kbps'),
|
||||
'fps': asset.get('frame_rate'),
|
||||
'url': asset_url,
|
||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||
'fps': int_or_none(asset.get('frame_rate')),
|
||||
'height': int_or_none(asset.get('height')),
|
||||
'width': int_or_none(asset.get('width')),
|
||||
})
|
||||
|
||||
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
||||
if mezzanine_url:
|
||||
formats.append({
|
||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||
'format_id': 'mezzanine',
|
||||
'preference': 1,
|
||||
'url': mezzanine_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = [{
|
||||
'url': thumbnail['url']
|
||||
} for thumbnail in api_data.get('thumbnails', [])]
|
||||
thumbnails = []
|
||||
for thumbnail in (video.get('thumbnails') or []):
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
metadata = api_data['metadata']
|
||||
tags = []
|
||||
for tag in (video.get('tags') or []):
|
||||
display_name = tag.get('displayName')
|
||||
if not display_name:
|
||||
continue
|
||||
tags.append(display_name)
|
||||
|
||||
return {
|
||||
'id': api_data.get('videoId') or video_id,
|
||||
'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
|
||||
'description': metadata.get('description'),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(metadata.get('description')),
|
||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||
'duration': int_or_none(metadata.get('duration')),
|
||||
'display_id': metadata.get('slug') or video_id,
|
||||
'uploader_id': metadata.get('creator'),
|
||||
'display_id': display_id,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
|
||||
class OneUPIE(IGNIE):
|
||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
|
||||
IE_NAME = '1up.com'
|
||||
|
||||
class IGNVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||
_TESTS = [{
|
||||
'url': 'http://gamevideos.1up.com/video/id/34976.html',
|
||||
'md5': 'c9cc69e07acb675c31a16719f909e347',
|
||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||
'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
|
||||
'info_dict': {
|
||||
'id': '34976',
|
||||
'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sniper Elite V2 - Trailer',
|
||||
'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
|
||||
'timestamp': 1313099220,
|
||||
'upload_date': '20110811',
|
||||
'uploader_id': 'IGN',
|
||||
'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
|
||||
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||
'timestamp': 1444665600,
|
||||
'upload_date': '20151012',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Twitter embed
|
||||
'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Vimeo embed
|
||||
'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
result = super(OneUPIE, self)._real_extract(url)
|
||||
result['id'] = mobj.group('name_or_id')
|
||||
return result
|
||||
video_id = self._match_id(url)
|
||||
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
||||
url = self._request_webpage(req, video_id).geturl()
|
||||
ign_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
||||
if ign_url:
|
||||
return self.url_result(ign_url, IGNIE.ie_key())
|
||||
return self.url_result(url)
|
||||
|
||||
|
||||
class PCMagIE(IGNIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
|
||||
IE_NAME = 'pcmag'
|
||||
|
||||
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
|
||||
|
||||
class IGNArticleIE(IGNBaseIE):
|
||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||
_PAGE_TYPE = 'article'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||
'md5': '212d6154fd0361a2781075f1febbe9ad',
|
||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||
'info_dict': {
|
||||
'id': 'ee10d774b508c9b8ec07e763b9125b91',
|
||||
'ext': 'mp4',
|
||||
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
|
||||
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
|
||||
'timestamp': 1420571160,
|
||||
'upload_date': '20150106',
|
||||
'uploader_id': 'cozzipix@gmail.com',
|
||||
}
|
||||
'id': '524497489e4e8ff5848ece34',
|
||||
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||
},
|
||||
'playlist': [
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '5ebbd138523268b93c9141af17bec937',
|
||||
'ext': 'mp4',
|
||||
'title': 'GTA 5 Video Review',
|
||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||
'timestamp': 1379339880,
|
||||
'upload_date': '20130916',
|
||||
},
|
||||
},
|
||||
{
|
||||
'info_dict': {
|
||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||
'ext': 'mp4',
|
||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||
'timestamp': 1386878820,
|
||||
'upload_date': '20131212',
|
||||
},
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'playlist_items': '2-3',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
|
||||
'md5': '94130c1ca07ba0adb6088350681f16c1',
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'info_dict': {
|
||||
'id': '042e560ba94823d43afcb12ddf7142ca',
|
||||
'ext': 'mp4',
|
||||
'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
|
||||
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
|
||||
'timestamp': 1412953920,
|
||||
'upload_date': '20141010',
|
||||
'uploader_id': 'chris_snyder@pcmag.com',
|
||||
}
|
||||
'id': '53ee806780a81ec46e0790f8',
|
||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# videoId pattern
|
||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# IMDB embed
|
||||
'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Facebook embed
|
||||
'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Brightcove embed
|
||||
'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
article = self._call_api(display_id)
|
||||
|
||||
def entries():
|
||||
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
||||
if media_url:
|
||||
yield self.url_result(media_url, IGNIE.ie_key())
|
||||
for content in (article.get('content') or []):
|
||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||
yield self.url_result(video_url)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), article.get('articleId'),
|
||||
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
||||
|
@@ -3,10 +3,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
@@ -23,7 +26,7 @@ class KakaoIE(InfoExtractor):
|
||||
'id': '301965083',
|
||||
'ext': 'mp4',
|
||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||
'uploader_id': 2671005,
|
||||
'uploader_id': '2671005',
|
||||
'uploader': '그랑그랑이',
|
||||
'timestamp': 1488160199,
|
||||
'upload_date': '20170227',
|
||||
@@ -36,11 +39,15 @@ class KakaoIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader_id': '2653210',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
}
|
||||
}, {
|
||||
# geo restricted
|
||||
'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -68,8 +75,7 @@ class KakaoIE(InfoExtractor):
|
||||
'fields': ','.join([
|
||||
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
||||
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
|
||||
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
|
||||
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
|
||||
}
|
||||
|
||||
@@ -82,24 +88,28 @@ class KakaoIE(InfoExtractor):
|
||||
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
query['tid'] = impress.get('tid', '')
|
||||
query.update({
|
||||
'fields': '-*,code,message,url',
|
||||
'tid': impress.get('tid') or '',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for fmt in clip.get('videoOutputList', []):
|
||||
for fmt in (clip.get('videoOutputList') or []):
|
||||
try:
|
||||
profile_name = fmt['profile']
|
||||
if profile_name == 'AUDIO':
|
||||
continue
|
||||
query.update({
|
||||
'profile': profile_name,
|
||||
'fields': '-*,url',
|
||||
})
|
||||
fmt_url_json = self._download_json(
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, headers=player_header, fatal=False)
|
||||
|
||||
if fmt_url_json is None:
|
||||
query['profile'] = profile_name
|
||||
try:
|
||||
fmt_url_json = self._download_json(
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query=query, headers=player_header)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if resp.get('code') == 'GeoBlocked':
|
||||
self.raise_geo_restricted()
|
||||
continue
|
||||
|
||||
fmt_url = fmt_url_json['url']
|
||||
@@ -116,27 +126,13 @@ class KakaoIE(InfoExtractor):
|
||||
pass
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbs = []
|
||||
for thumb in clip.get('clipChapterThumbnailList', []):
|
||||
thumbs.append({
|
||||
'url': thumb.get('thumbnailUrl'),
|
||||
'id': compat_str(thumb.get('timeInSec')),
|
||||
'preference': -1 if thumb.get('isDefault') else 0
|
||||
})
|
||||
top_thumbnail = clip.get('thumbnailUrl')
|
||||
if top_thumbnail:
|
||||
thumbs.append({
|
||||
'url': top_thumbnail,
|
||||
'preference': 10,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': title,
|
||||
'description': strip_or_none(clip.get('description')),
|
||||
'uploader': clip_link.get('channel', {}).get('name'),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'thumbnails': thumbs,
|
||||
'uploader': try_get(clip_link, lambda x: x['channel']['name']),
|
||||
'uploader_id': str_or_none(clip_link.get('channelId')),
|
||||
'thumbnail': clip.get('thumbnailUrl'),
|
||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||
'duration': int_or_none(clip.get('duration')),
|
||||
'view_count': int_or_none(clip.get('playCount')),
|
||||
|
@@ -2,268 +2,113 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .gigya import GigyaBaseIE
|
||||
|
||||
from ..compat import compat_str
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class MedialaanIE(GigyaBaseIE):
|
||||
class MedialaanIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.|nieuws\.)?
|
||||
(?:
|
||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
||||
(?:
|
||||
video(?:/[^/]+/id/|/?\?.*?\baid=)|
|
||||
(?:[^/]+/)*
|
||||
)
|
||||
(?:embed\.)?mychannels.video/embed/|
|
||||
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
|
||||
(?:www\.)?(?:
|
||||
(?:
|
||||
7sur7|
|
||||
demorgen|
|
||||
hln|
|
||||
joe|
|
||||
qmusic
|
||||
)\.be|
|
||||
(?:
|
||||
[abe]d|
|
||||
bndestem|
|
||||
destentor|
|
||||
gelderlander|
|
||||
pzc|
|
||||
tubantia|
|
||||
volkskrant
|
||||
)\.nl
|
||||
)/video/(?:[^/]+/)*[^/?&#]+~p
|
||||
)
|
||||
(?P<id>[^/?#&]+)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_NETRC_MACHINE = 'medialaan'
|
||||
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
|
||||
_SITE_TO_APP_ID = {
|
||||
'vtm': 'vtm_watch',
|
||||
'q2': 'q2',
|
||||
'vtmkzoom': 'vtmkzoom',
|
||||
}
|
||||
_TESTS = [{
|
||||
# vod
|
||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
|
||||
'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
|
||||
'info_dict': {
|
||||
'id': 'vtm_20170219_VM0678361_vtmwatch',
|
||||
'id': '193993',
|
||||
'ext': 'mp4',
|
||||
'title': 'Allemaal Chris afl. 6',
|
||||
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
|
||||
'timestamp': 1487533280,
|
||||
'upload_date': '20170219',
|
||||
'duration': 2562,
|
||||
'series': 'Allemaal Chris',
|
||||
'season': 'Allemaal Chris',
|
||||
'season_number': 1,
|
||||
'season_id': '256936078124527',
|
||||
'episode': 'Allemaal Chris afl. 6',
|
||||
'episode_number': 6,
|
||||
'episode_id': '256936078591527',
|
||||
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
|
||||
'timestamp': 1611663540,
|
||||
'upload_date': '20210126',
|
||||
'duration': 238,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://vtm.be/video?aid=168332',
|
||||
'info_dict': {
|
||||
'id': '168332',
|
||||
'ext': 'mp4',
|
||||
'title': '"Veronique liegt!"',
|
||||
'description': 'md5:1385e2b743923afe54ba4adc38476155',
|
||||
'timestamp': 1489002029,
|
||||
'upload_date': '20170308',
|
||||
'duration': 96,
|
||||
},
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
|
||||
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://vtm.be/video?aid=163157',
|
||||
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# vod
|
||||
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
|
||||
'url': 'https://embed.mychannels.video/script/production/193993',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
||||
'url': 'https://embed.mychannels.video/production/193993',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# http/s redirect
|
||||
'url': 'https://vtmkzoom.be/video?aid=45724',
|
||||
'info_dict': {
|
||||
'id': '257136373657000',
|
||||
'ext': 'mp4',
|
||||
'title': 'K3 Dansstudio Ushuaia afl.6',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires account credentials',
|
||||
'url': 'https://mychannels.video/embed/193993',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# nieuws.vtm.be
|
||||
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
||||
'url': 'https://embed.mychannels.video/embed/193993',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
self.raise_login_required()
|
||||
|
||||
auth_data = {
|
||||
'APIKey': self._APIKEY,
|
||||
'sdk': 'js_6.1',
|
||||
'format': 'json',
|
||||
'loginID': username,
|
||||
'password': password,
|
||||
}
|
||||
|
||||
auth_info = self._gigya_login(auth_data)
|
||||
|
||||
self._uid = auth_info['UID']
|
||||
self._uid_signature = auth_info['UIDSignature']
|
||||
self._signature_timestamp = auth_info['signatureTimestamp']
|
||||
|
||||
self._logged_in = True
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
entries = []
|
||||
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
|
||||
mychannels_id = extract_attributes(element).get('data-mychannels-id')
|
||||
if mychannels_id:
|
||||
entries.append('https://mychannels.video/embed/' + mychannels_id)
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, site_id = mobj.group('id', 'site_id')
|
||||
production_id = self._match_id(url)
|
||||
production = self._download_json(
|
||||
'https://embed.mychannels.video/sdk/production/' + production_id,
|
||||
production_id, query={'options': 'UUUU_default'})['productions'][0]
|
||||
title = production['title']
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
|
||||
webpage, 'config', default='{}'), video_id,
|
||||
transform_source=lambda s: s.replace(
|
||||
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
|
||||
|
||||
vod_id = config.get('vodId') or self._search_regex(
|
||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
||||
r'"vodId"\s*:\s*"(.+?)"',
|
||||
r'<[^>]+id=["\']vod-(\d+)'),
|
||||
webpage, 'video_id', default=None)
|
||||
|
||||
# clip, no authentication required
|
||||
if not vod_id:
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
|
||||
default=''),
|
||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
||||
if player:
|
||||
video = player[-1]
|
||||
if video['videoUrl'] in ('http', 'https'):
|
||||
return self.url_result(video['url'], MedialaanIE.ie_key())
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video['videoUrl'],
|
||||
'title': video['title'],
|
||||
'thumbnail': video.get('imageUrl'),
|
||||
'timestamp': int_or_none(video.get('createdDate')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
||||
formats = []
|
||||
for source in (production.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, production_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
info = self._parse_html5_media_entries(
|
||||
url, webpage, video_id, m3u8_id='hls')[0]
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta('description', webpage),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'url': src,
|
||||
})
|
||||
# vod, authentication required
|
||||
else:
|
||||
if not self._logged_in:
|
||||
self._login()
|
||||
self._sort_formats(formats)
|
||||
|
||||
settings = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings', default='{}'),
|
||||
video_id)
|
||||
|
||||
def get(container, item):
|
||||
return try_get(
|
||||
settings, lambda x: x[container][item],
|
||||
compat_str) or self._search_regex(
|
||||
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
|
||||
default=None)
|
||||
|
||||
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
|
||||
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
|
||||
|
||||
data = self._download_json(
|
||||
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
|
||||
video_id, query={
|
||||
'app_id': app_id,
|
||||
'user_network': sso,
|
||||
'UID': self._uid,
|
||||
'UIDSignature': self._uid_signature,
|
||||
'signatureTimestamp': self._signature_timestamp,
|
||||
})
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['response']['uri'], video_id, entry_protocol='m3u8_native',
|
||||
ext='mp4', m3u8_id='hls')
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': vod_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
api_key = get('vod', 'apiKey')
|
||||
channel = get('medialaanGigya', 'channel')
|
||||
|
||||
if api_key:
|
||||
videos = self._download_json(
|
||||
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
|
||||
query={
|
||||
'channels': channel,
|
||||
'ids': vod_id,
|
||||
'limit': 1,
|
||||
'apikey': api_key,
|
||||
})
|
||||
if videos:
|
||||
video = try_get(
|
||||
videos, lambda x: x['response']['videos'][0], dict)
|
||||
if video:
|
||||
def get(container, item, expected_type=None):
|
||||
return try_get(
|
||||
video, lambda x: x[container][item], expected_type)
|
||||
|
||||
def get_string(container, item):
|
||||
return get(container, item, compat_str)
|
||||
|
||||
info.update({
|
||||
'series': get_string('program', 'title'),
|
||||
'season': get_string('season', 'title'),
|
||||
'season_number': int_or_none(get('season', 'number')),
|
||||
'season_id': get_string('season', 'id'),
|
||||
'episode': get_string('episode', 'title'),
|
||||
'episode_number': int_or_none(get('episode', 'number')),
|
||||
'episode_id': get_string('episode', 'id'),
|
||||
'duration': int_or_none(
|
||||
video.get('duration')) or int_or_none(
|
||||
video.get('durationMillis'), scale=1000),
|
||||
'title': get_string('episode', 'title'),
|
||||
'description': get_string('episode', 'text'),
|
||||
'timestamp': unified_timestamp(get_string(
|
||||
'publication', 'begin')),
|
||||
})
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = try_get(
|
||||
config, lambda x: x['videoConfig']['title'],
|
||||
compat_str) or self._html_search_regex(
|
||||
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
|
||||
if not info.get('description'):
|
||||
info['description'] = self._html_search_regex(
|
||||
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
return info
|
||||
return {
|
||||
'id': production_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': production.get('posterUrl'),
|
||||
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
|
||||
'duration': int_or_none(production.get('duration')) or None,
|
||||
}
|
||||
|
@@ -2,10 +2,11 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -14,7 +15,7 @@ class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||
'info_dict': {
|
||||
'id': 'ae5Ag7B',
|
||||
@@ -29,7 +30,11 @@ class NineGagIE(InfoExtractor):
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# HTML escaped title
|
||||
'url': 'https://9gag.com/gag/av5nvyb',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
@@ -43,7 +48,7 @@ class NineGagIE(InfoExtractor):
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
title = post['title']
|
||||
title = unescapeHTML(post['title'])
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
|
@@ -413,7 +413,8 @@ class PeerTubeIE(InfoExtractor):
|
||||
peertube3\.cpy\.re|
|
||||
peertube2\.cpy\.re|
|
||||
videos\.tcit\.fr|
|
||||
peertube\.cpy\.re
|
||||
peertube\.cpy\.re|
|
||||
canard\.tube
|
||||
)'''
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
|
||||
|
@@ -22,11 +22,15 @@ from ..utils import (
|
||||
orderedSet,
|
||||
remove_quotes,
|
||||
str_to_int,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PornHubBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'pornhub'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
@@ -52,6 +56,66 @@ class PornHubBaseIE(InfoExtractor):
|
||||
|
||||
return webpage, urlh
|
||||
|
||||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _login(self, host):
|
||||
if self._logged_in:
|
||||
return
|
||||
|
||||
site = host.split('.')[0]
|
||||
|
||||
# Both sites pornhub and pornhubpremium have separate accounts
|
||||
# so there should be an option to provide credentials for both.
|
||||
# At the same time some videos are available under the same video id
|
||||
# on both sites so that we have to identify them as the same video.
|
||||
# For that purpose we have to keep both in the same extractor
|
||||
# but under different netrc machines.
|
||||
username, password = self._get_login_info(netrc_machine=site)
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '')
|
||||
login_page = self._download_webpage(
|
||||
login_url, None, 'Downloading %s login page' % site)
|
||||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']signOut',
|
||||
r'>Sign\s+[Oo]ut\s*<'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.%s/front/authenticate' % host, None,
|
||||
'Logging in to %s' % site,
|
||||
data=urlencode_postdata(login_form),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': login_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
|
||||
if response.get('success') == '1':
|
||||
self._logged_in = True
|
||||
return
|
||||
|
||||
message = response.get('message')
|
||||
if message is not None:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % message, expected=True)
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class PornHubIE(PornHubBaseIE):
|
||||
IE_DESC = 'PornHub and Thumbzilla'
|
||||
@@ -163,12 +227,20 @@ class PornHubIE(PornHubBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Some videos are available with the same id on both premium
|
||||
# and non-premium sites (e.g. this and the following test)
|
||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)',
|
||||
webpage)
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
@@ -180,12 +252,7 @@ class PornHubIE(PornHubBaseIE):
|
||||
host = mobj.group('host') or 'pornhub.com'
|
||||
video_id = mobj.group('id')
|
||||
|
||||
if 'premium' in host:
|
||||
if not self._downloader.params.get('cookiefile'):
|
||||
raise ExtractorError(
|
||||
'PornHub Premium requires authentication.'
|
||||
' You may want to use --cookies.',
|
||||
expected=True)
|
||||
self._login(host)
|
||||
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
|
||||
@@ -405,6 +472,10 @@ class PornHubIE(PornHubBaseIE):
|
||||
|
||||
|
||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
def _extract_page(self, url):
|
||||
return int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None))
|
||||
|
||||
def _extract_entries(self, webpage, host):
|
||||
# Only process container div with main playlist content skipping
|
||||
# drop-down menu that uses similar pattern for videos (see
|
||||
@@ -422,26 +493,6 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||
container))
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
playlist_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = self._extract_entries(webpage, host)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
|
||||
'playlist', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
title = playlist.get('title') or self._search_regex(
|
||||
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, title, playlist.get('description'))
|
||||
|
||||
|
||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||
@@ -463,14 +514,27 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Unavailable via /videos page, but available with direct pagination
|
||||
# on pornstar page (see [1]), requires premium
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Same as before, multi page
|
||||
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user_id = mobj.group('id')
|
||||
videos_url = '%s/videos' % mobj.group('url')
|
||||
page = self._extract_page(url)
|
||||
if page:
|
||||
videos_url = update_url_query(videos_url, {'page': page})
|
||||
return self.url_result(
|
||||
'%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
|
||||
video_id=user_id)
|
||||
videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id)
|
||||
|
||||
|
||||
class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
@@ -483,32 +547,55 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||
<button[^>]+\bid=["\']moreDataBtn
|
||||
''', webpage) is not None
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
item_id = mobj.group('id')
|
||||
def _entries(self, url, host, item_id):
|
||||
page = self._extract_page(url)
|
||||
|
||||
page = int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None))
|
||||
VIDEOS = '/videos'
|
||||
|
||||
entries = []
|
||||
for page_num in (page, ) if page is not None else itertools.count(1):
|
||||
def download_page(base_url, num, fallback=False):
|
||||
note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '')
|
||||
return self._download_webpage(
|
||||
base_url, item_id, note, query={'page': num})
|
||||
|
||||
def is_404(e):
|
||||
return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
|
||||
|
||||
base_url = url
|
||||
has_page = page is not None
|
||||
first_page = page if has_page else 1
|
||||
for page_num in (first_page, ) if has_page else itertools.count(first_page):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, item_id, 'Downloading page %d' % page_num,
|
||||
query={'page': page_num})
|
||||
try:
|
||||
webpage = download_page(base_url, page_num)
|
||||
except ExtractorError as e:
|
||||
# Some sources may not be available via /videos page,
|
||||
# trying to fallback to main page pagination (see [1])
|
||||
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||
if is_404(e) and page_num == first_page and VIDEOS in base_url:
|
||||
base_url = base_url.replace(VIDEOS, '')
|
||||
webpage = download_page(base_url, page_num, fallback=True)
|
||||
else:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if is_404(e) and page_num != first_page:
|
||||
break
|
||||
raise
|
||||
page_entries = self._extract_entries(webpage, host)
|
||||
if not page_entries:
|
||||
break
|
||||
entries.extend(page_entries)
|
||||
for e in page_entries:
|
||||
yield e
|
||||
if not self._has_more(webpage):
|
||||
break
|
||||
|
||||
return self.playlist_result(orderedSet(entries), item_id)
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
item_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
|
||||
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
||||
|
||||
|
||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||
|
100
youtube_dl/extractor/samplefocus.py
Normal file
100
youtube_dl/extractor/samplefocus.py
Normal file
@@ -0,0 +1,100 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class SampleFocusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar',
|
||||
'md5': '48c8d62d60be467293912e0e619a5120',
|
||||
'info_dict': {
|
||||
'id': '40316',
|
||||
'display_id': 'lil-peep-sad-emo-guitar',
|
||||
'ext': 'mp3',
|
||||
'title': 'Lil Peep Sad Emo Guitar',
|
||||
'thumbnail': r're:^https?://.+\.png',
|
||||
'license': 'Standard License',
|
||||
'uploader': 'CapsCtrl',
|
||||
'uploader_id': 'capsctrl',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'categories': ['Samples', 'Guitar', 'Electric guitar'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://samplefocus.com/samples/dababy-style-bass-808',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://samplefocus.com/samples/young-chop-kick',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
sample_id = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
|
||||
webpage, 'sample id', group='id')
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False) or self._html_search_regex(
|
||||
r'<h1>(.+?)</h1>', webpage, 'title')
|
||||
|
||||
mp3_url = self._search_regex(
|
||||
r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)',
|
||||
webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex(
|
||||
r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>',
|
||||
webpage, 'mp3 url', group=0))['content']
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex(
|
||||
r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)',
|
||||
webpage, 'mp3', fatal=False, group='url')
|
||||
|
||||
comments = []
|
||||
for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage):
|
||||
comments.append({
|
||||
'author': author,
|
||||
'author_id': author_id,
|
||||
'text': body,
|
||||
})
|
||||
|
||||
uploader_id = uploader = None
|
||||
mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
|
||||
breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage)
|
||||
categories = []
|
||||
if breadcrumb:
|
||||
for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb):
|
||||
categories.append(name)
|
||||
|
||||
def extract_count(klass):
|
||||
return int_or_none(self._html_search_regex(
|
||||
r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass,
|
||||
webpage, klass, fatal=False))
|
||||
|
||||
return {
|
||||
'id': sample_id,
|
||||
'title': title,
|
||||
'url': mp3_url,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'license': self._html_search_regex(
|
||||
r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<',
|
||||
webpage, 'license', fatal=False, group='license'),
|
||||
'uploader_id': uploader_id,
|
||||
'like_count': extract_count('sample-%s-favorites' % sample_id),
|
||||
'comment_count': extract_count('comments'),
|
||||
'comments': comments,
|
||||
'categories': categories,
|
||||
}
|
160
youtube_dl/extractor/simplecast.py
Normal file
160
youtube_dl/extractor/simplecast.py
Normal file
@@ -0,0 +1,160 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class SimplecastBaseIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_API_BASE = 'https://api.simplecast.com/'
|
||||
|
||||
def _call_api(self, path_tmpl, video_id):
|
||||
return self._download_json(
|
||||
self._API_BASE + path_tmpl % video_id, video_id)
|
||||
|
||||
def _call_search_api(self, resource, resource_id, resource_url):
|
||||
return self._download_json(
|
||||
'https://api.simplecast.com/%ss/search' % resource, resource_id,
|
||||
data=urlencode_postdata({'url': resource_url}))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
episode_id = episode['id']
|
||||
title = episode['title'].strip()
|
||||
audio_file = episode.get('audio_file') or {}
|
||||
audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
|
||||
|
||||
season = episode.get('season') or {}
|
||||
season_href = season.get('href')
|
||||
season_id = None
|
||||
if season_href:
|
||||
season_id = self._search_regex(
|
||||
r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
|
||||
season_href, 'season id', default=None)
|
||||
|
||||
webpage_url = episode.get('episode_url')
|
||||
channel_url = None
|
||||
if webpage_url:
|
||||
channel_url = self._search_regex(
|
||||
r'(https?://[^/]+\.simplecast\.com)',
|
||||
webpage_url, 'channel url', default=None)
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'display_id': episode.get('slug'),
|
||||
'title': title,
|
||||
'url': clean_podcast_url(audio_file_url),
|
||||
'webpage_url': webpage_url,
|
||||
'channel_url': channel_url,
|
||||
'series': try_get(episode, lambda x: x['podcast']['title']),
|
||||
'season_number': int_or_none(season.get('number')),
|
||||
'season_id': season_id,
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode.get('number')),
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'timestamp': parse_iso8601(episode.get('published_at')),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
|
||||
}
|
||||
|
||||
|
||||
class SimplecastIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast'
|
||||
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
|
||||
_COMMON_TEST_INFO = {
|
||||
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'ext': 'mp3',
|
||||
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
|
||||
'episode_number': 1,
|
||||
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
|
||||
'season_number': 1,
|
||||
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
|
||||
'series': 'The RE:BIND.io Podcast',
|
||||
'duration': 5343,
|
||||
'timestamp': 1580979475,
|
||||
'upload_date': '20200206',
|
||||
'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||
'info_dict': _COMMON_TEST_INFO,
|
||||
}, {
|
||||
'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'''(?x)<iframe[^>]+src=["\']
|
||||
(
|
||||
https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
|
||||
player\.simplecast\.com/%s
|
||||
))''' % SimplecastBaseIE._UUID_REGEX, webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('episodes/%s', episode_id)
|
||||
return self._parse_episode(episode)
|
||||
|
||||
|
||||
class SimplecastEpisodeIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast:episode'
|
||||
_VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||
'info_dict': SimplecastIE._COMMON_TEST_INFO,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
episode = self._call_search_api(
|
||||
'episode', mobj.group(1), mobj.group(0))
|
||||
return self._parse_episode(episode)
|
||||
|
||||
|
||||
class SimplecastPodcastIE(SimplecastBaseIE):
|
||||
IE_NAME = 'simplecast:podcast'
|
||||
_VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com',
|
||||
'playlist_mincount': 33,
|
||||
'info_dict': {
|
||||
'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
|
||||
'title': 'The RE:BIND.io Podcast',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
subdomain = self._match_id(url)
|
||||
site = self._call_search_api('site', subdomain, url)
|
||||
podcast = site['podcast']
|
||||
podcast_id = podcast['id']
|
||||
podcast_title = podcast.get('title')
|
||||
|
||||
def entries():
|
||||
episodes = self._call_api('podcasts/%s/episodes', podcast_id)
|
||||
for episode in (episodes.get('collection') or []):
|
||||
info = self._parse_episode(episode)
|
||||
info['series'] = podcast_title
|
||||
yield info
|
||||
|
||||
return self.playlist_result(entries(), podcast_id, podcast_title)
|
151
youtube_dl/extractor/storyfire.py
Normal file
151
youtube_dl/extractor/storyfire.py
Normal file
@@ -0,0 +1,151 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
# HEADRequest,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
smuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class StoryFireBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
return self._download_json(
|
||||
'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query)
|
||||
|
||||
def _parse_video(self, video):
|
||||
title = video['title']
|
||||
vimeo_id = self._search_regex(
|
||||
r'https?://player\.vimeo\.com/external/(\d+)',
|
||||
video['vimeoVideoURL'], 'vimeo id')
|
||||
|
||||
# video_url = self._request_webpage(
|
||||
# HEADRequest(video['vimeoVideoURL']), video_id).geturl()
|
||||
# formats = []
|
||||
# for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
|
||||
# formats.extend(self._extract_m3u8_formats(
|
||||
# v_url, video_id, 'mp4', 'm3u8_native',
|
||||
# m3u8_id='hls' + suffix, fatal=False))
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# v_url.replace('.m3u8', '.mpd'), video_id,
|
||||
# mpd_id='dash' + suffix, fatal=False))
|
||||
# self._sort_formats(formats)
|
||||
|
||||
uploader_id = video.get('hostID')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': vimeo_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'url': smuggle_url(
|
||||
'https://player.vimeo.com/video/' + vimeo_id, {
|
||||
'http_headers': {
|
||||
'Referer': 'https://storyfire.com/',
|
||||
}
|
||||
}),
|
||||
# 'formats': formats,
|
||||
'thumbnail': video.get('storyImage'),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'like_count': int_or_none(video.get('likesCount')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
'duration': int_or_none(video.get('videoDuration')),
|
||||
'timestamp': int_or_none(video.get('publishDate')),
|
||||
'uploader': video.get('username'),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
|
||||
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
|
||||
}
|
||||
|
||||
|
||||
class StoryFireIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
|
||||
_TEST = {
|
||||
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
||||
'md5': 'caec54b9e4621186d6079c7ec100c1eb',
|
||||
'info_dict': {
|
||||
'id': '378954662',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buzzfeed Teaches You About Memes',
|
||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
||||
'timestamp': 1576129028,
|
||||
'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
|
||||
'uploader': 'whang!',
|
||||
'upload_date': '20191212',
|
||||
'duration': 418,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata']
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._call_api(
|
||||
'generic/video-detail', video_id, 'video')['video']
|
||||
return self._parse_video(video)
|
||||
|
||||
|
||||
class StoryFireUserIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
|
||||
_TEST = {
|
||||
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
||||
'info_dict': {
|
||||
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
||||
},
|
||||
'playlist_mincount': 151,
|
||||
}
|
||||
_PAGE_SIZE = 20
|
||||
|
||||
def _fetch_page(self, user_id, page):
|
||||
videos = self._call_api(
|
||||
'publicVideos', user_id, 'page %d' % (page + 1), {
|
||||
'skip': page * self._PAGE_SIZE,
|
||||
})['videos']
|
||||
for video in videos:
|
||||
yield self._parse_video(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, user_id), self._PAGE_SIZE)
|
||||
return self.playlist_result(entries, user_id)
|
||||
|
||||
|
||||
class StoryFireSeriesIE(StoryFireBaseIE):
|
||||
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
||||
'info_dict': {
|
||||
'id': '-Lq6MsuIHLODO6d2dDkr',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}, {
|
||||
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
||||
'info_dict': {
|
||||
'id': 'the_mortal_one',
|
||||
},
|
||||
'playlist_count': 0,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, stories):
|
||||
for story in stories.values():
|
||||
if story.get('hasVideo'):
|
||||
yield self._parse_video(story)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
stories = self._call_api(
|
||||
'seriesStories', series_id, 'series stories')
|
||||
return self.playlist_result(self._extract_videos(stories), series_id)
|
@@ -255,8 +255,10 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
svt_id = self._search_regex(
|
||||
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||
r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
||||
r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
|
||||
r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
|
||||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)'),
|
||||
r'["\']svtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
|
||||
r'["\']svtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)'),
|
||||
webpage, 'video id')
|
||||
|
||||
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||
|
@@ -20,7 +20,7 @@ from ..utils import (
|
||||
|
||||
class TV2IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tv2.no/v/916509/',
|
||||
'info_dict': {
|
||||
'id': '916509',
|
||||
@@ -33,7 +33,7 @@ class TV2IE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
},
|
||||
}
|
||||
}]
|
||||
_API_DOMAIN = 'sumo.tv2.no'
|
||||
_PROTOCOLS = ('HDS', 'HLS', 'DASH')
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
@@ -42,6 +42,12 @@ class TV2IE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
|
||||
|
||||
asset = self._download_json(
|
||||
api_base + '.json', video_id,
|
||||
'Downloading metadata JSON')['asset']
|
||||
title = asset.get('subtitle') or asset['title']
|
||||
is_live = asset.get('live') is True
|
||||
|
||||
formats = []
|
||||
format_urls = []
|
||||
for protocol in self._PROTOCOLS:
|
||||
@@ -81,7 +87,8 @@ class TV2IE(InfoExtractor):
|
||||
elif ext == 'm3u8':
|
||||
if not data.get('drmProtected'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
video_url, video_id, 'mp4',
|
||||
'm3u8' if is_live else 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -99,11 +106,6 @@ class TV2IE(InfoExtractor):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
self._sort_formats(formats)
|
||||
|
||||
asset = self._download_json(
|
||||
api_base + '.json', video_id,
|
||||
'Downloading metadata JSON')['asset']
|
||||
title = asset['title']
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail.get('@type'),
|
||||
'url': thumbnail.get('url'),
|
||||
@@ -112,7 +114,7 @@ class TV2IE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': strip_or_none(asset.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': parse_iso8601(asset.get('createTime')),
|
||||
@@ -120,6 +122,7 @@ class TV2IE(InfoExtractor):
|
||||
'view_count': int_or_none(asset.get('views')),
|
||||
'categories': asset.get('keywords', '').split(','),
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
@@ -168,13 +171,13 @@ class TV2ArticleIE(InfoExtractor):
|
||||
|
||||
|
||||
class KatsomoIE(TV2IE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv(uutiset)?)\.fi/(?:sarja/[0-9a-z-]+-\d+/[0-9a-z-]+-|(?:#!/)?jakso/(?:\d+/[^/]+/)?|video/prog)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
|
||||
'info_dict': {
|
||||
'id': '1181321',
|
||||
'ext': 'mp4',
|
||||
'title': 'MTV Uutiset Live',
|
||||
'title': 'Lahden Pelicans teki kovan ratkaisun – Ville Nieminen pihalle',
|
||||
'description': 'Päätöksen teki Pelicansin hallitus.',
|
||||
'timestamp': 1575116484,
|
||||
'upload_date': '20191130',
|
||||
@@ -186,7 +189,60 @@ class KatsomoIE(TV2IE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.katsomo.fi/#!/jakso/33001005/studio55-fi/658521/jukka-kuoppamaki-tekee-yha-lauluja-vaikka-lentokoneessa',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mtvuutiset.fi/video/prog1311159',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.katsomo.fi/#!/jakso/1311159',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_DOMAIN = 'api.katsomo.fi'
|
||||
_PROTOCOLS = ('HLS', 'MPD')
|
||||
_GEO_COUNTRIES = ['FI']
|
||||
|
||||
|
||||
class MTVUutisetArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)mtvuutiset\.fi/artikkeli/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mtvuutiset.fi/artikkeli/tallaisia-vaurioita-viking-amorellassa-on-useamman-osaston-alla-vetta/7931384',
|
||||
'info_dict': {
|
||||
'id': '1311159',
|
||||
'ext': 'mp4',
|
||||
'title': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
|
||||
'description': 'Viking Amorellan matkustajien evakuointi on alkanut – tältä operaatio näyttää laivalla',
|
||||
'timestamp': 1600608966,
|
||||
'upload_date': '20200920',
|
||||
'duration': 153.7886666,
|
||||
'view_count': int,
|
||||
'categories': list,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# multiple Youtube embeds
|
||||
'url': 'https://www.mtvuutiset.fi/artikkeli/50-vuotta-subarun-vastaiskua/6070962',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
article = self._download_json(
|
||||
'http://api.mtvuutiset.fi/mtvuutiset/api/json/' + article_id,
|
||||
article_id)
|
||||
|
||||
def entries():
|
||||
for video in (article.get('videos') or []):
|
||||
video_type = video.get('videotype')
|
||||
video_url = video.get('url')
|
||||
if not (video_url and video_type in ('katsomo', 'youtube')):
|
||||
continue
|
||||
yield self.url_result(
|
||||
video_url, video_type.capitalize(), video.get('video_id'))
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), article_id, article.get('title'), article.get('description'))
|
||||
|
@@ -17,7 +17,7 @@ class TV4IE(InfoExtractor):
|
||||
tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
|
||||
tv4play\.se/
|
||||
(?:
|
||||
(?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
|
||||
(?:program|barn)/(?:(?:[^/]+/){1,2}|(?:[^\?]+)\?video_id=)|
|
||||
iframe/video/|
|
||||
film/|
|
||||
sport/|
|
||||
@@ -65,6 +65,10 @@ class TV4IE(InfoExtractor):
|
||||
{
|
||||
'url': 'http://www.tv4play.se/program/farang/3922081',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
@@ -42,8 +42,8 @@ class URPlayIE(InfoExtractor):
|
||||
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
urplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"',
|
||||
webpage, 'urplayer data'), video_id)['currentProduct']
|
||||
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes'][0]
|
||||
episode = urplayer_data['title']
|
||||
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
||||
host = self._download_json(
|
||||
|
@@ -4,21 +4,22 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
qualities,
|
||||
random_birthday,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class VideoPressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)'
|
||||
_ID_REGEX = r'[\da-zA-Z]{8}'
|
||||
_PATH_REGEX = r'video(?:\.word)?press\.com/embed/'
|
||||
_VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX)
|
||||
_TESTS = [{
|
||||
'url': 'https://videopress.com/embed/kUJmAcSf',
|
||||
'md5': '706956a6c875873d51010921310e4bc6',
|
||||
@@ -36,35 +37,36 @@ class VideoPressIE(InfoExtractor):
|
||||
# 17+, requires birth_* params
|
||||
'url': 'https://videopress.com/embed/iH3gstfZ',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.wordpress.com/embed/kUJmAcSf',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
|
||||
r'<iframe[^>]+src=["\']((?:https?://)?%s%s)' % (VideoPressIE._PATH_REGEX, VideoPressIE._ID_REGEX),
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
||||
query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width'
|
||||
video = self._download_json(
|
||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||
video_id, query=query)
|
||||
|
||||
title = video['title']
|
||||
|
||||
def base_url(scheme):
|
||||
return try_get(
|
||||
video, lambda x: x['file_url_base'][scheme], compat_str)
|
||||
|
||||
base_url = base_url('https') or base_url('http')
|
||||
file_url_base = video.get('file_url_base') or {}
|
||||
base_url = file_url_base.get('https') or file_url_base.get('http')
|
||||
|
||||
QUALITIES = ('std', 'dvd', 'hd')
|
||||
quality = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, f in video['files'].items():
|
||||
for format_id, f in (video.get('files') or {}).items():
|
||||
if not isinstance(f, dict):
|
||||
continue
|
||||
for ext, path in f.items():
|
||||
@@ -75,12 +77,14 @@ class VideoPressIE(InfoExtractor):
|
||||
'ext': determine_ext(path, ext),
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
original_url = try_get(video, lambda x: x['original'], compat_str)
|
||||
original_url = video.get('original')
|
||||
if original_url:
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': 'original',
|
||||
'quality': len(QUALITIES),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class VidioIE(InfoExtractor):
|
||||
@@ -21,57 +27,63 @@ class VidioIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 149,
|
||||
'like_count': int,
|
||||
'uploader': 'TWELVE Pic',
|
||||
'timestamp': 1444902800,
|
||||
'upload_date': '20151015',
|
||||
'uploader_id': 'twelvepictures',
|
||||
'channel': 'Cover Music Video',
|
||||
'channel_id': '280236',
|
||||
'view_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
'tags': 'count:4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
self._api_key = self._download_json(
|
||||
'https://www.vidio.com/auth', None, data=b'')['api_key']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
data = self._download_json(
|
||||
'https://api.vidio.com/videos/' + video_id, display_id, headers={
|
||||
'Content-Type': 'application/vnd.api+json',
|
||||
'X-API-KEY': self._api_key,
|
||||
})
|
||||
video = data['videos'][0]
|
||||
title = video['title'].strip()
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
m3u8_url, duration, thumbnail = [None] * 3
|
||||
|
||||
clips = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'data-json-clips\s*=\s*(["\'])(?P<data>\[.+?\])\1',
|
||||
webpage, 'video data', default='[]', group='data'),
|
||||
display_id, fatal=False)
|
||||
if clips:
|
||||
clip = clips[0]
|
||||
m3u8_url = clip.get('sources', [{}])[0].get('file')
|
||||
duration = clip.get('clip_duration')
|
||||
thumbnail = clip.get('image')
|
||||
|
||||
m3u8_url = m3u8_url or self._search_regex(
|
||||
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'hls url', group='url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
|
||||
data['clips'][0]['hls_url'], display_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(duration or self._search_regex(
|
||||
r'data-video-duration=(["\'])(?P<duration>\d+)\1', webpage,
|
||||
'duration', fatal=False, group='duration'))
|
||||
thumbnail = thumbnail or self._og_search_thumbnail(webpage)
|
||||
|
||||
like_count = int_or_none(self._search_regex(
|
||||
(r'<span[^>]+data-comment-vote-count=["\'](\d+)',
|
||||
r'<span[^>]+class=["\'].*?\blike(?:__|-)count\b.*?["\'][^>]*>\s*(\d+)'),
|
||||
webpage, 'like count', fatal=False))
|
||||
get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
|
||||
channel = get_first('channel')
|
||||
user = get_first('user')
|
||||
username = user.get('username')
|
||||
get_count = lambda x: int_or_none(video.get('total_' + x))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'like_count': like_count,
|
||||
'description': strip_or_none(video.get('description')),
|
||||
'thumbnail': video.get('image_url_medium'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'like_count': get_count('likes'),
|
||||
'formats': formats,
|
||||
'uploader': user.get('name'),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'uploader_id': username,
|
||||
'uploader_url': 'https://www.vidio.com/@' + username if username else None,
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': str_or_none(channel.get('id')),
|
||||
'view_count': get_count('view_count'),
|
||||
'dislike_count': get_count('dislikes'),
|
||||
'comment_count': get_count('comments'),
|
||||
'tags': video.get('tag_list'),
|
||||
}
|
||||
|
@@ -1,68 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
decode_packed_codes,
|
||||
js_to_json,
|
||||
NO_DEFAULT,
|
||||
PACKED_CODES_RE,
|
||||
)
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||
'info_dict': {
|
||||
'id': 'cghql9yq6emu',
|
||||
'ext': 'mp4',
|
||||
'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidzi.cc/cghql9yq6emu.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vidzi.si/rph9gztxj1et.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidzi.nu/cghql9yq6emu.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://vidzi.tv/%s' % video_id, video_id)
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
codes = [webpage]
|
||||
codes.extend([
|
||||
decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
|
||||
for mobj in re.finditer(PACKED_CODES_RE, webpage)])
|
||||
for num, code in enumerate(codes, 1):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'setup\(([^)]+)\)', code, 'jwplayer data',
|
||||
default=NO_DEFAULT if num == len(codes) else '{}'),
|
||||
video_id, transform_source=lambda s: js_to_json(
|
||||
re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
|
||||
if jwplayer_data:
|
||||
break
|
||||
|
||||
info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
|
||||
info_dict['title'] = title
|
||||
|
||||
return info_dict
|
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +31,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
_API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
|
||||
|
||||
_APP = '100005a'
|
||||
_APP_VERSION = '2.2.5.1428709186'
|
||||
_APP_VERSION = '6.0.0'
|
||||
_APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
|
||||
|
||||
_GEO_BYPASS = False
|
||||
@@ -41,7 +42,7 @@ class VikiBaseIE(InfoExtractor):
|
||||
_ERRORS = {
|
||||
'geo': 'Sorry, this content is not available in your region.',
|
||||
'upcoming': 'Sorry, this content is not yet available.',
|
||||
# 'paywall': 'paywall',
|
||||
'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
|
||||
}
|
||||
|
||||
def _prepare_call(self, path, timestamp=None, post_data=None):
|
||||
@@ -62,7 +63,8 @@ class VikiBaseIE(InfoExtractor):
|
||||
|
||||
def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
|
||||
resp = self._download_json(
|
||||
self._prepare_call(path, timestamp, post_data), video_id, note)
|
||||
self._prepare_call(path, timestamp, post_data), video_id, note,
|
||||
headers={'x-viki-app-ver': self._APP_VERSION})
|
||||
|
||||
error = resp.get('error')
|
||||
if error:
|
||||
@@ -82,11 +84,13 @@ class VikiBaseIE(InfoExtractor):
|
||||
expected=True)
|
||||
|
||||
def _check_errors(self, data):
|
||||
for reason, status in data.get('blocking', {}).items():
|
||||
for reason, status in (data.get('blocking') or {}).items():
|
||||
if status and reason in self._ERRORS:
|
||||
message = self._ERRORS[reason]
|
||||
if reason == 'geo':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
elif reason == 'paywall':
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError('%s said: %s' % (
|
||||
self.IE_NAME, message), expected=True)
|
||||
|
||||
@@ -131,13 +135,19 @@ class VikiIE(VikiBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1023585v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heirs Episode 14',
|
||||
'uploader': 'SBS',
|
||||
'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
|
||||
'title': 'Heirs - Episode 14',
|
||||
'uploader': 'SBS Contents Hub',
|
||||
'timestamp': 1385047627,
|
||||
'upload_date': '20131121',
|
||||
'age_limit': 13,
|
||||
'duration': 3570,
|
||||
'episode_number': 14,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'Blocked in the US',
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
# clip
|
||||
'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
|
||||
@@ -153,7 +163,8 @@ class VikiIE(VikiBaseIE):
|
||||
'uploader': 'Arirang TV',
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
}
|
||||
},
|
||||
'skip': 'Sorry. There was an error loading this video',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
|
||||
'info_dict': {
|
||||
@@ -171,7 +182,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
|
||||
'md5': '94e0e34fd58f169f40c184f232356cfe',
|
||||
'md5': '0a53dc252e6e690feccd756861495a8c',
|
||||
'info_dict': {
|
||||
'id': '44699v',
|
||||
'ext': 'mp4',
|
||||
@@ -183,6 +194,10 @@ class VikiIE(VikiBaseIE):
|
||||
'uploader': 'group8',
|
||||
'like_count': int,
|
||||
'age_limit': 13,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}, {
|
||||
@@ -209,7 +224,7 @@ class VikiIE(VikiBaseIE):
|
||||
}, {
|
||||
# non-English description
|
||||
'url': 'http://www.viki.com/videos/158036v-love-in-magic',
|
||||
'md5': 'adf9e321a0ae5d0aace349efaaff7691',
|
||||
'md5': '41faaba0de90483fb4848952af7c7d0d',
|
||||
'info_dict': {
|
||||
'id': '158036v',
|
||||
'ext': 'mp4',
|
||||
@@ -220,6 +235,10 @@ class VikiIE(VikiBaseIE):
|
||||
'title': 'Love In Magic',
|
||||
'age_limit': 13,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -229,36 +248,33 @@ class VikiIE(VikiBaseIE):
|
||||
'https://www.viki.com/api/videos/' + video_id,
|
||||
video_id, 'Downloading video JSON', headers={
|
||||
'x-client-user-agent': std_headers['User-Agent'],
|
||||
'x-viki-app-ver': '4.0.57',
|
||||
'x-viki-app-ver': '3.0.0',
|
||||
})
|
||||
video = resp['video']
|
||||
|
||||
self._check_errors(video)
|
||||
|
||||
title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
|
||||
episode_number = int_or_none(video.get('number'))
|
||||
if not title:
|
||||
title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = video.get('container', {}).get('titles', {})
|
||||
title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
|
||||
container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
|
||||
container_title = self.dict_selection(container_titles, 'en')
|
||||
title = '%s - %s' % (container_title, title)
|
||||
|
||||
description = self.dict_selection(video.get('descriptions', {}), 'en')
|
||||
|
||||
duration = int_or_none(video.get('duration'))
|
||||
timestamp = parse_iso8601(video.get('created_at'))
|
||||
uploader = video.get('author')
|
||||
like_count = int_or_none(video.get('likes', {}).get('count'))
|
||||
age_limit = parse_age_limit(video.get('rating'))
|
||||
like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail in video.get('images', {}).items():
|
||||
for thumbnail_id, thumbnail in (video.get('images') or {}).items():
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail.get('url'),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
|
||||
for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
|
||||
subtitles[subtitle_lang] = [{
|
||||
'ext': subtitles_format,
|
||||
'url': self._prepare_call(
|
||||
@@ -269,13 +285,15 @@ class VikiIE(VikiBaseIE):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'timestamp': parse_iso8601(video.get('created_at')),
|
||||
'uploader': video.get('author'),
|
||||
'uploader_url': video.get('author_url'),
|
||||
'like_count': like_count,
|
||||
'age_limit': age_limit,
|
||||
'age_limit': parse_age_limit(video.get('rating')),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'episode_number': episode_number,
|
||||
}
|
||||
|
||||
formats = []
|
||||
@@ -360,7 +378,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'info_dict': {
|
||||
'id': '50c',
|
||||
'title': 'Boys Over Flowers',
|
||||
'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
|
||||
'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
|
||||
},
|
||||
'playlist_mincount': 71,
|
||||
}, {
|
||||
@@ -371,6 +389,7 @@ class VikiChannelIE(VikiBaseIE):
|
||||
'description': 'md5:05bf5471385aa8b21c18ad450e350525',
|
||||
},
|
||||
'playlist_count': 127,
|
||||
'skip': 'Page not found',
|
||||
}, {
|
||||
'url': 'http://www.viki.com/news/24569c-showbiz-korea',
|
||||
'only_matching': True,
|
||||
|
@@ -226,10 +226,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_original_format(self, url, video_id):
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None):
|
||||
query = {'action': 'load_download_config'}
|
||||
if unlisted_hash:
|
||||
query['unlisted_hash'] = unlisted_hash
|
||||
download_data = self._download_json(
|
||||
url, video_id, fatal=False,
|
||||
query={'action': 'load_download_config'},
|
||||
url, video_id, fatal=False, query=query,
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
if download_data:
|
||||
source_file = download_data.get('source_file')
|
||||
@@ -509,6 +511,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
{
|
||||
'url': 'https://vimeo.com/160743502/abd0e13fb4',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# requires passing unlisted_hash(a52724358e) to load_download_config request
|
||||
'url': 'https://vimeo.com/392479337/a52724358e',
|
||||
'only_matching': True,
|
||||
}
|
||||
# https://gettingthingsdone.com/workflowmap/
|
||||
# vimeo embed with check-password page protected by Referer header
|
||||
@@ -673,7 +680,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(redirect_url, video_id, headers)
|
||||
|
||||
vod = config.get('video', {}).get('vod', {})
|
||||
video = config.get('video') or {}
|
||||
vod = video.get('vod') or {}
|
||||
|
||||
def is_rented():
|
||||
if '>You rented this title.<' in webpage:
|
||||
@@ -733,7 +741,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
formats = []
|
||||
|
||||
source_format = self._extract_original_format(
|
||||
'https://vimeo.com/' + video_id, video_id)
|
||||
'https://vimeo.com/' + video_id, video_id, video.get('unlisted_hash'))
|
||||
if source_format:
|
||||
formats.append(source_format)
|
||||
|
||||
|
@@ -116,7 +116,7 @@ class VLiveIE(VLiveBaseIE):
|
||||
headers={'Referer': 'https://www.vlive.tv/'}, query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
|
||||
self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
62
youtube_dl/extractor/vtm.py
Normal file
62
youtube_dl/extractor/vtm.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class VTMIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})'
|
||||
_TEST = {
|
||||
'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1',
|
||||
'md5': '37dca85fbc3a33f2de28ceb834b071f8',
|
||||
'info_dict': {
|
||||
'id': '192445',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gast vernielt Genkse hotelkamer',
|
||||
'timestamp': 1611060180,
|
||||
'upload_date': '20210119',
|
||||
'duration': 74,
|
||||
# TODO: fix url _type result processing
|
||||
# 'series': 'Op Interventie',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql',
|
||||
uuid, query={
|
||||
'query': '''{
|
||||
getComponent(type: Video, uuid: "%s") {
|
||||
... on Video {
|
||||
description
|
||||
duration
|
||||
myChannelsVideo
|
||||
program {
|
||||
title
|
||||
}
|
||||
publishedAt
|
||||
title
|
||||
}
|
||||
}
|
||||
}''' % uuid,
|
||||
}, headers={
|
||||
'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e',
|
||||
})['data']['getComponent']
|
||||
|
||||
return {
|
||||
'_type': 'url',
|
||||
'id': uuid,
|
||||
'title': video.get('title'),
|
||||
'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'],
|
||||
'description': video.get('description'),
|
||||
'timestamp': parse_iso8601(video.get('publishedAt')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'series': try_get(video, lambda x: x['program']['title']),
|
||||
'ie_key': 'Medialaan',
|
||||
}
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -47,6 +48,22 @@ class VVVVIDIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video_type == 'video/youtube'
|
||||
'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
|
||||
'md5': '33e0edfba720ad73a8782157fdebc648',
|
||||
'info_dict': {
|
||||
'id': 'RzmFKUDOUgw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trailer',
|
||||
'upload_date': '20150906',
|
||||
'description': 'md5:a5e802558d35247fee285875328c0b80',
|
||||
'uploader_id': 'BandaiVisual',
|
||||
'uploader': 'BANDAI NAMCO Arts Channel',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
|
||||
'only_matching': True
|
||||
@@ -154,12 +171,13 @@ class VVVVIDIE(InfoExtractor):
|
||||
if season_number:
|
||||
info['season_number'] = int(season_number)
|
||||
|
||||
for quality in ('_sd', ''):
|
||||
video_type = video_data.get('video_type')
|
||||
is_youtube = False
|
||||
for quality in ('', '_sd'):
|
||||
embed_code = video_data.get('embed_info' + quality)
|
||||
if not embed_code:
|
||||
continue
|
||||
embed_code = ds(embed_code)
|
||||
video_type = video_data.get('video_type')
|
||||
if video_type in ('video/rcs', 'video/kenc'):
|
||||
if video_type == 'video/kenc':
|
||||
kenc = self._download_json(
|
||||
@@ -172,19 +190,28 @@ class VVVVIDIE(InfoExtractor):
|
||||
if kenc_message:
|
||||
embed_code += '?' + ds(kenc_message)
|
||||
formats.extend(self._extract_akamai_formats(embed_code, video_id))
|
||||
elif video_type == 'video/youtube':
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'url': embed_code,
|
||||
})
|
||||
is_youtube = True
|
||||
break
|
||||
else:
|
||||
formats.extend(self._extract_wowza_formats(
|
||||
'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
|
||||
metadata_from_url(embed_code)
|
||||
|
||||
self._sort_formats(formats)
|
||||
if not is_youtube:
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
|
||||
metadata_from_url(video_data.get('thumbnail'))
|
||||
info.update(self._extract_common_video_info(video_data))
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(video_data.get('length')),
|
||||
'series': video_data.get('show_title'),
|
||||
'season_id': season_id,
|
||||
|
@@ -1,40 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
month_by_abbreviation,
|
||||
parse_filesize,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class XboxClipsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xboxclips\.com/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\w-]{36})'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:xboxclips\.com|gameclips\.io)/(?:video\.php\?.*vid=|[^/]+/)(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://xboxclips.com/video.php?uid=2533274823424419&gamertag=Iabdulelah&vid=074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'md5': 'fbe1ec805e920aeb8eced3c3e657df5d',
|
||||
'info_dict': {
|
||||
'id': '074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'ext': 'mp4',
|
||||
'title': 'Iabdulelah playing Titanfall',
|
||||
'title': 'iAbdulElah playing Titanfall',
|
||||
'filesize_approx': 26800000,
|
||||
'upload_date': '20140807',
|
||||
'duration': 56,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
if '/video.php' in url:
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0])
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'>(?:Link|Download): <a[^>]+href="([^"]+)"', webpage, 'video URL')
|
||||
title = self._html_search_regex(
|
||||
r'<title>XboxClips \| ([^<]+)</title>', webpage, 'title')
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'>Recorded: ([^<]+)<', webpage, 'upload date', fatal=False))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||
upload_date = None
|
||||
mobj = re.search(
|
||||
r'>Recorded: (\d{2})-(Jan|Feb|Mar|Apr|May|Ju[nl]|Aug|Sep|Oct|Nov|Dec)-(\d{4})',
|
||||
webpage)
|
||||
if mobj:
|
||||
upload_date = '%s%.2d%s' % (mobj.group(3), month_by_abbreviation(mobj.group(2)), mobj.group(1))
|
||||
filesize = parse_filesize(self._html_search_regex(
|
||||
r'>Size: ([^<]+)<', webpage, 'file size', fatal=False))
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
@@ -42,12 +57,12 @@ class XboxClipsIE(InfoExtractor):
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'>Views: (\d+)<', webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'filesize_approx': filesize,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
@@ -11,11 +11,14 @@ from ..utils import (
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -146,36 +149,89 @@ class XHamsterIE(InfoExtractor):
|
||||
video = initials['videoModel']
|
||||
title = video['title']
|
||||
formats = []
|
||||
for format_id, formats_dict in video['sources'].items():
|
||||
format_urls = set()
|
||||
format_sizes = {}
|
||||
sources = try_get(video, lambda x: x['sources'], dict) or {}
|
||||
for format_id, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
download_sources = try_get(sources, lambda x: x['download'], dict) or {}
|
||||
for quality, format_dict in download_sources.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_sizes[quality] = float_or_none(format_dict.get('size'))
|
||||
for quality, format_item in formats_dict.items():
|
||||
if format_id == 'download':
|
||||
# Download link takes some time to be generated,
|
||||
# skipping for now
|
||||
continue
|
||||
if not isinstance(format_item, dict):
|
||||
continue
|
||||
format_url = format_item.get('link')
|
||||
filesize = int_or_none(
|
||||
format_item.get('size'), invscale=1000000)
|
||||
else:
|
||||
format_url = format_item
|
||||
filesize = None
|
||||
format_url = format_item
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
if not format_url or format_url in format_urls:
|
||||
continue
|
||||
format_urls.add(format_url)
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, quality),
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_url, 'mp4'),
|
||||
'height': get_height(quality),
|
||||
'filesize': filesize,
|
||||
'filesize': format_sizes.get(quality),
|
||||
'http_headers': {
|
||||
'Referer': urlh.geturl(),
|
||||
},
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
xplayer_sources = try_get(
|
||||
initials, lambda x: x['xplayerSettings']['sources'], dict)
|
||||
if xplayer_sources:
|
||||
hls_sources = xplayer_sources.get('hls')
|
||||
if isinstance(hls_sources, dict):
|
||||
for hls_format_key in ('url', 'fallback'):
|
||||
hls_url = hls_sources.get(hls_format_key)
|
||||
if not hls_url:
|
||||
continue
|
||||
hls_url = urljoin(url, hls_url)
|
||||
if not hls_url or hls_url in format_urls:
|
||||
continue
|
||||
format_urls.add(hls_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
standard_sources = xplayer_sources.get('standard')
|
||||
if isinstance(standard_sources, dict):
|
||||
for format_id, formats_list in standard_sources.items():
|
||||
if not isinstance(formats_list, list):
|
||||
continue
|
||||
for standard_format in formats_list:
|
||||
if not isinstance(standard_format, dict):
|
||||
continue
|
||||
for standard_format_key in ('url', 'fallback'):
|
||||
standard_url = standard_format.get(standard_format_key)
|
||||
if not standard_url:
|
||||
continue
|
||||
standard_url = urljoin(url, standard_url)
|
||||
if not standard_url or standard_url in format_urls:
|
||||
continue
|
||||
format_urls.add(standard_url)
|
||||
ext = determine_ext(standard_url, 'mp4')
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
standard_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
quality = (str_or_none(standard_format.get('quality'))
|
||||
or str_or_none(standard_format.get('label'))
|
||||
or '')
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (format_id, quality),
|
||||
'url': standard_url,
|
||||
'ext': ext,
|
||||
'height': get_height(quality),
|
||||
'filesize': format_sizes.get(quality),
|
||||
'http_headers': {
|
||||
'Referer': standard_url,
|
||||
},
|
||||
})
|
||||
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
categories_list = video.get('categories')
|
||||
if isinstance(categories_list, list):
|
||||
|
@@ -1,8 +1,9 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
@@ -209,17 +210,27 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
|
||||
missing_track_ids = [
|
||||
track_id for track_id in track_ids
|
||||
if track_id not in present_track_ids]
|
||||
missing_tracks = self._call_api(
|
||||
'track-entries', tld, url, item_id,
|
||||
'Downloading missing tracks JSON', {
|
||||
'entries': ','.join(missing_track_ids),
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
'strict': 'true',
|
||||
})
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
# Request missing tracks in chunks to avoid exceeding max HTTP header size,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/27355
|
||||
_TRACKS_PER_CHUNK = 250
|
||||
for chunk_num in itertools.count(0):
|
||||
start = chunk_num * _TRACKS_PER_CHUNK
|
||||
end = start + _TRACKS_PER_CHUNK
|
||||
missing_track_ids_req = missing_track_ids[start:end]
|
||||
assert missing_track_ids_req
|
||||
missing_tracks = self._call_api(
|
||||
'track-entries', tld, url, item_id,
|
||||
'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
|
||||
'entries': ','.join(missing_track_ids_req),
|
||||
'lang': tld,
|
||||
'external-domain': 'music.yandex.%s' % tld,
|
||||
'overembed': 'false',
|
||||
'strict': 'true',
|
||||
})
|
||||
if missing_tracks:
|
||||
tracks.extend(missing_tracks)
|
||||
if end >= len(missing_track_ids):
|
||||
break
|
||||
|
||||
return tracks
|
||||
|
||||
|
@@ -25,6 +25,7 @@ class YouPornIE(InfoExtractor):
|
||||
'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
|
||||
'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 210,
|
||||
'uploader': 'Ask Dan And Jennifer',
|
||||
'upload_date': '20101217',
|
||||
'average_rating': int,
|
||||
@@ -54,6 +55,7 @@ class YouPornIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404',
|
||||
}, {
|
||||
'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
||||
'only_matching': True,
|
||||
@@ -153,6 +155,8 @@ class YouPornIE(InfoExtractor):
|
||||
thumbnail = self._search_regex(
|
||||
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
|
||||
webpage, 'thumbnail', fatal=False, group='thumbnail')
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'video:duration', webpage, 'duration', fatal=False))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
|
||||
@@ -194,6 +198,7 @@ class YouPornIE(InfoExtractor):
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'average_rating': average_rating,
|
||||
|
File diff suppressed because it is too large
Load Diff
69
youtube_dl/extractor/zhihu.py
Normal file
69
youtube_dl/extractor/zhihu.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none
|
||||
|
||||
|
||||
class ZhihuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
|
||||
'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
|
||||
'info_dict': {
|
||||
'id': '1342930761977176064',
|
||||
'ext': 'mp4',
|
||||
'title': '写春联也太难了吧!',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'uploader': '桥半舫',
|
||||
'timestamp': 1612959715,
|
||||
'upload_date': '20210210',
|
||||
'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
|
||||
'duration': 146.333,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
zvideo = self._download_json(
|
||||
'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
|
||||
title = zvideo['title']
|
||||
video = zvideo.get('video') or {}
|
||||
|
||||
formats = []
|
||||
for format_id, q in (video.get('playlist') or {}).items():
|
||||
play_url = q.get('url') or q.get('play_url')
|
||||
if not play_url:
|
||||
continue
|
||||
formats.append({
|
||||
'asr': int_or_none(q.get('sample_rate')),
|
||||
'filesize': int_or_none(q.get('size')),
|
||||
'format_id': format_id,
|
||||
'fps': int_or_none(q.get('fps')),
|
||||
'height': int_or_none(q.get('height')),
|
||||
'tbr': float_or_none(q.get('bitrate')),
|
||||
'url': play_url,
|
||||
'width': int_or_none(q.get('width')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
author = zvideo.get('author') or {}
|
||||
url_token = author.get('url_token')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
|
||||
'uploader': author.get('name'),
|
||||
'timestamp': int_or_none(zvideo.get('published_at')),
|
||||
'uploader_id': author.get('id'),
|
||||
'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None,
|
||||
'duration': float_or_none(video.get('duration')),
|
||||
'view_count': int_or_none(zvideo.get('play_count')),
|
||||
'like_count': int_or_none(zvideo.get('liked_count')),
|
||||
'comment_count': int_or_none(zvideo.get('comment_count')),
|
||||
}
|
@@ -87,11 +87,16 @@ class ZypeIE(InfoExtractor):
|
||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
|
||||
body, 'm3u8 url', group='url', default=None)
|
||||
if not m3u8_url:
|
||||
source = self._parse_json(self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
|
||||
'source'), video_id, js_to_json)
|
||||
if source.get('integration') == 'verizon-media':
|
||||
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
|
||||
source = self._search_regex(
|
||||
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
|
||||
|
||||
def get_attr(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
|
||||
source, key, group='val')
|
||||
|
||||
if get_attr('integration') == 'verizon-media':
|
||||
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
|
||||
text_tracks = self._search_regex(
|
||||
|
@@ -89,10 +89,14 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
|
||||
elif info['ext'] in ['m4a', 'mp4']:
|
||||
if not check_executable('AtomicParsley', ['-v']):
|
||||
atomicparsley = next((x
|
||||
for x in ['AtomicParsley', 'atomicparsley']
|
||||
if check_executable(x, ['-v'])), None)
|
||||
|
||||
if atomicparsley is None:
|
||||
raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
|
||||
|
||||
cmd = [encodeFilename('AtomicParsley', True),
|
||||
cmd = [encodeFilename(atomicparsley, True),
|
||||
encodeFilename(filename, True),
|
||||
encodeArgument('--artwork'),
|
||||
encodeFilename(thumbnail_filename, True),
|
||||
|
@@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.01.24'
|
||||
__version__ = '2021.02.22'
|
||||
|
Reference in New Issue
Block a user