mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-15 04:38:36 +09:00
Compare commits
385 Commits
2020.01.01
...
3a78198a96
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3a78198a96 | ||
![]() |
836c810716 | ||
![]() |
97c5be383c | ||
![]() |
a7ea88537a | ||
![]() |
6c35de4c6b | ||
![]() |
579d43951d | ||
![]() |
e24ebeafd1 | ||
![]() |
01c92973dd | ||
![]() |
f4415faa46 | ||
![]() |
a86ce9d7a1 | ||
![]() |
37258c644f | ||
![]() |
d0512ac4c5 | ||
![]() |
da4eaa15a2 | ||
![]() |
191286265d | ||
![]() |
3234272818 | ||
![]() |
9d2c90354f | ||
![]() |
316b10855a | ||
![]() |
484fe78737 | ||
![]() |
2cd43a00d1 | ||
![]() |
dd0f524c69 | ||
![]() |
c84f9475b8 | ||
![]() |
15f2734791 | ||
![]() |
cb6e24f946 | ||
![]() |
9d531aa291 | ||
![]() |
e9cbb98a0f | ||
![]() |
193422e12a | ||
![]() |
c4cabf040e | ||
![]() |
f23eceebbf | ||
![]() |
650bd8f623 | ||
![]() |
5d8cb4367d | ||
![]() |
82abc13aed | ||
![]() |
1e72660c9b | ||
![]() |
049f224248 | ||
![]() |
0ada1b90b8 | ||
![]() |
71ddc222ad | ||
![]() |
21292c0649 | ||
![]() |
46a265a2da | ||
![]() |
e2096776b9 | ||
![]() |
a1c88c4819 | ||
![]() |
51ca93d751 | ||
![]() |
a7e0531999 | ||
![]() |
ab0eda99e1 | ||
![]() |
ec99f47108 | ||
![]() |
b31b5f4434 | ||
![]() |
86f2fa1590 | ||
![]() |
7bc7fbce23 | ||
![]() |
a78e530c14 | ||
![]() |
af7bb684c0 | ||
![]() |
dd9e0f58f3 | ||
![]() |
59e583f7e8 | ||
![]() |
daa25d4142 | ||
![]() |
25a35cb38a | ||
![]() |
2cf8003638 | ||
![]() |
cf1a8668e8 | ||
![]() |
2dbb45ae82 | ||
![]() |
4fe190df70 | ||
![]() |
039e715b30 | ||
![]() |
32152bab7a | ||
![]() |
b1347a5881 | ||
![]() |
91e954587f | ||
![]() |
9b505185da | ||
![]() |
444a68e0ec | ||
![]() |
c7178f0f7a | ||
![]() |
5c3f7014ef | ||
![]() |
d65628ef03 | ||
![]() |
8a6c5b0806 | ||
![]() |
9360936f26 | ||
![]() |
bb2b89e077 | ||
![]() |
2d7a29081c | ||
![]() |
2864179293 | ||
![]() |
284f8306df | ||
![]() |
aa613ef7e1 | ||
![]() |
cb2b9a22a5 | ||
![]() |
5b867c15a8 | ||
![]() |
f8c749f12c | ||
![]() |
7d509c613b | ||
![]() |
b92e95aa01 | ||
![]() |
e3cad6bd99 | ||
![]() |
6699b6ce41 | ||
![]() |
91dcde8a38 | ||
![]() |
11f3471c4b | ||
![]() |
f4093b34f6 | ||
![]() |
a80b23c373 | ||
![]() |
2e7fa18bb9 | ||
![]() |
fe07e788bf | ||
![]() |
6d3bdcf217 | ||
![]() |
ec2a2ab441 | ||
![]() |
9448a20312 | ||
![]() |
3f1748b944 | ||
![]() |
fe13087cd1 | ||
![]() |
f22fa82d7f | ||
![]() |
059fa9aa81 | ||
![]() |
650aec4a98 | ||
![]() |
2ea9c97432 | ||
![]() |
efc589b865 | ||
![]() |
1737ea69b9 | ||
![]() |
1fb034d029 | ||
![]() |
28f9568a84 | ||
![]() |
5bbdadd5f8 | ||
![]() |
2de2ca6659 | ||
![]() |
051071203c | ||
![]() |
34299510bb | ||
![]() |
b9bceba37c | ||
![]() |
6d4733ce7b | ||
![]() |
416da574ec | ||
![]() |
48c5663c5f | ||
![]() |
7d740e7dc7 | ||
![]() |
4eda10499e | ||
![]() |
605535776a | ||
![]() |
1050e0d09f | ||
![]() |
d65d89183f | ||
![]() |
0c92f1e96b | ||
![]() |
adae9e844b | ||
![]() |
c5764b3f89 | ||
![]() |
0837992a22 | ||
![]() |
b55715934b | ||
![]() |
bbc3b5b4bb | ||
![]() |
1ca5f821c8 | ||
![]() |
defc820b70 | ||
![]() |
82ef02e936 | ||
![]() |
b856b3997c | ||
![]() |
cd85a1bb8b | ||
![]() |
ce5b904050 | ||
![]() |
ad06b99dd4 | ||
![]() |
540b9f5164 | ||
![]() |
6e65a2a67e | ||
![]() |
f8c7bed133 | ||
![]() |
cdc55e666f | ||
![]() |
86b7c00adc | ||
![]() |
e8c5d40bc8 | ||
![]() |
ca7ebc4e5e | ||
![]() |
bff857a8af | ||
![]() |
a31a022efd | ||
![]() |
45f6362464 | ||
![]() |
97f34a48d7 | ||
![]() |
ea74e00b3a | ||
![]() |
06cd4cdb25 | ||
![]() |
da2069fb22 | ||
![]() |
95c9810015 | ||
![]() |
b03eebdb6a | ||
![]() |
1f7675451c | ||
![]() |
aa27253556 | ||
![]() |
d51e23d9fc | ||
![]() |
6cd452acff | ||
![]() |
50e9fcc1fd | ||
![]() |
16ee69c1b7 | ||
![]() |
67171ed7e9 | ||
![]() |
1d9bf655e6 | ||
![]() |
62ae19ff76 | ||
![]() |
5ed05f26ad | ||
![]() |
841b683804 | ||
![]() |
f5863a3ea0 | ||
![]() |
10709fc7c6 | ||
![]() |
a7e348556a | ||
![]() |
6cb30ea5ed | ||
![]() |
a4ed50bb84 | ||
![]() |
570611955f | ||
![]() |
e450f6cb63 | ||
![]() |
a115e07594 | ||
![]() |
718393c632 | ||
![]() |
07af16b92e | ||
![]() |
e942cfd1a7 | ||
![]() |
9a7e5cb88a | ||
![]() |
2391941f28 | ||
![]() |
9ff6165a81 | ||
![]() |
1c748722f9 | ||
![]() |
ee0b726cd7 | ||
![]() |
dbeafce5d5 | ||
![]() |
ed604ce7bc | ||
![]() |
7adc7ca547 | ||
![]() |
a6211d237b | ||
![]() |
7b16239a49 | ||
![]() |
37357d21a9 | ||
![]() |
b477fc1314 | ||
![]() |
d84b21b427 | ||
![]() |
48bd042ce7 | ||
![]() |
84213ea8d4 | ||
![]() |
562de77f41 | ||
![]() |
e1723c4bac | ||
![]() |
607d204551 | ||
![]() |
a5b6102ea8 | ||
![]() |
b77888228d | ||
![]() |
0b1eaec3bc | ||
![]() |
b37e47a3f9 | ||
![]() |
ce3735df02 | ||
![]() |
a0455d0ffd | ||
![]() |
c8b232cc48 | ||
![]() |
b4eb0bc7bd | ||
![]() |
d5147b65ac | ||
![]() |
7b0b53ea69 | ||
![]() |
7016e24ebe | ||
![]() |
bef4688c72 | ||
![]() |
228c1d685b | ||
![]() |
efd72b05d2 | ||
![]() |
fe515e5c75 | ||
![]() |
1db5ab6b34 | ||
![]() |
2791e80b60 | ||
![]() |
8f841fafcd | ||
![]() |
a54c5f83c0 | ||
![]() |
cd13343ad8 | ||
![]() |
9cd5f54e31 | ||
![]() |
9a269547f2 | ||
![]() |
bf097a5077 | ||
![]() |
52c50a10af | ||
![]() |
b334732709 | ||
![]() |
384bf91f88 | ||
![]() |
fae11394f0 | ||
![]() |
adc13b0748 | ||
![]() |
327593257c | ||
![]() |
9d8f3a12a6 | ||
![]() |
b002bc433a | ||
![]() |
b74896dad1 | ||
![]() |
fa3db38333 | ||
![]() |
30fa5c6087 | ||
![]() |
6c907eb33f | ||
![]() |
f7b42518dc | ||
![]() |
ce7db64bf1 | ||
![]() |
1328305851 | ||
![]() |
6c22cee673 | ||
![]() |
6d874fee2a | ||
![]() |
676723e0da | ||
![]() |
c380cc28c4 | ||
![]() |
f7f304910d | ||
![]() |
00a41ca4c3 | ||
![]() |
66f32ca0e1 | ||
![]() |
6ffc3cf74a | ||
![]() |
4433bb0245 | ||
![]() |
e40c758c2a | ||
![]() |
011e75e641 | ||
![]() |
2468a6fa64 | ||
![]() |
700265bfcf | ||
![]() |
c97f5e934f | ||
![]() |
38db9a405a | ||
![]() |
2cdfe977d7 | ||
![]() |
46d0baf941 | ||
![]() |
00eb865b3c | ||
![]() |
2f19835726 | ||
![]() |
533f3e3557 | ||
![]() |
75294a5ed0 | ||
![]() |
b9e5f87291 | ||
![]() |
6b09401b0b | ||
![]() |
5caf88ccb4 | ||
![]() |
dcc8522fdb | ||
![]() |
c9595ee780 | ||
![]() |
91bd3bd019 | ||
![]() |
13b08034b5 | ||
![]() |
6a6e1a0cd8 | ||
![]() |
4e7b5bba5f | ||
![]() |
52c4c51556 | ||
![]() |
8fae1a04eb | ||
![]() |
d44a707fdd | ||
![]() |
049c0486bb | ||
![]() |
30b5121a1c | ||
![]() |
b439634f0e | ||
![]() |
6e47200b6e | ||
![]() |
38fa761a45 | ||
![]() |
08a27407c4 | ||
![]() |
be7dacf9cf | ||
![]() |
4560adc820 | ||
![]() |
63dce3094b | ||
![]() |
b4eb08bb03 | ||
![]() |
2e20cb3636 | ||
![]() |
a6c5859d6b | ||
![]() |
c76cdf2382 | ||
![]() |
787c360467 | ||
![]() |
73453430c1 | ||
![]() |
158bc5ac03 | ||
![]() |
4568a11802 | ||
![]() |
4cbce88f8b | ||
![]() |
541fe3eaff | ||
![]() |
9bfe088594 | ||
![]() |
fcaf4d7a06 | ||
![]() |
40b6495d40 | ||
![]() |
f1a8511f7b | ||
![]() |
042b664933 | ||
![]() |
68fa15155f | ||
![]() |
434f573046 | ||
![]() |
cff99c91d1 | ||
![]() |
fa9b8c6628 | ||
![]() |
ea782aca52 | ||
![]() |
43ebf77df3 | ||
![]() |
d332ec725d | ||
![]() |
f93abcf1da | ||
![]() |
0ec9d4e565 | ||
![]() |
34525a3885 | ||
![]() |
2db9ac228d | ||
![]() |
5429d6a9cb | ||
![]() |
dc879c5a37 | ||
![]() |
12ee431676 | ||
![]() |
46cc54ca8f | ||
![]() |
1e1c1960aa | ||
![]() |
ac379fa236 | ||
![]() |
0e30a7b973 | ||
![]() |
3b5399ce0f | ||
![]() |
1c45ff5572 | ||
![]() |
669625a32c | ||
![]() |
170f5b7c27 | ||
![]() |
b274e48d56 | ||
![]() |
50d19895a1 | ||
![]() |
6d475d01d8 | ||
![]() |
f8cbd8c963 | ||
![]() |
838f051c4b | ||
![]() |
e88b450771 | ||
![]() |
278355bae4 | ||
![]() |
b4cbdbd4b3 | ||
![]() |
ea17979d83 | ||
![]() |
886d985959 | ||
![]() |
7947a1f7db | ||
![]() |
fca6dba8b8 | ||
![]() |
e2f8bf5888 | ||
![]() |
b76f0e58f7 | ||
![]() |
bee6451fe8 | ||
![]() |
00d798b7c2 | ||
![]() |
fda6d237a5 | ||
![]() |
5d9f6cbc5a | ||
![]() |
97c822b3d5 | ||
![]() |
117ba9e9df | ||
![]() |
0d718db623 | ||
![]() |
7bf27721d6 | ||
![]() |
f6052ec923 | ||
![]() |
4e9e1e240d | ||
![]() |
e0abaab293 | ||
![]() |
de1121d749 | ||
![]() |
293c9f0186 | ||
![]() |
06f1de2daf | ||
![]() |
b68a6e32fb | ||
![]() |
8cd809fb3d | ||
![]() |
d6aa1db7ed | ||
![]() |
f377edec06 | ||
![]() |
bfe2b8cf2a | ||
![]() |
82fea5b42e | ||
![]() |
fffc618c51 | ||
![]() |
705b1cda99 | ||
![]() |
7d55b62ff2 | ||
![]() |
0d006fac5c | ||
![]() |
00de61a98f | ||
![]() |
d95a1cc98e | ||
![]() |
4935749730 | ||
![]() |
51c7f40c83 | ||
![]() |
4877ffc0e9 | ||
![]() |
8e4d3f83ce | ||
![]() |
43e7994749 | ||
![]() |
2a5c26c980 | ||
![]() |
76dbe4df5f | ||
![]() |
bffdedfabd | ||
![]() |
c3cfea9068 | ||
![]() |
22cb94902f | ||
![]() |
be96f9924f | ||
![]() |
9cf30dc017 | ||
![]() |
f4a18db748 | ||
![]() |
fd032450f0 | ||
![]() |
a4b2769451 | ||
![]() |
d9a2f86791 | ||
![]() |
c968f738df | ||
![]() |
48ff5590c1 | ||
![]() |
2c482bff7c | ||
![]() |
a9866c0366 | ||
![]() |
90ea83c64d | ||
![]() |
e4e5fa6e3c | ||
![]() |
e8cf0dbdd8 | ||
![]() |
d7c55f226d | ||
![]() |
bfdc8340c9 | ||
![]() |
14bb191634 | ||
![]() |
628e5bc0b7 | ||
![]() |
3fc56635b7 | ||
![]() |
bd2c211fcc | ||
![]() |
10a5091e58 | ||
![]() |
aca2fd222f | ||
![]() |
9ba179c1fa | ||
![]() |
3fdf573148 | ||
![]() |
d4e0cd69ef | ||
![]() |
483b858d49 | ||
![]() |
a71c1d1a5a | ||
![]() |
838171630d | ||
![]() |
c88debff5d | ||
![]() |
3cb05b86de | ||
![]() |
b2771a2853 | ||
![]() |
7bac77413d | ||
![]() |
0264903574 | ||
![]() |
2f7aa680b7 | ||
![]() |
0d2306d02b | ||
![]() |
233826f68f | ||
![]() |
259ad38173 | ||
![]() |
44b434e4e3 | ||
![]() |
484637a9cc |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.01**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.01.01
|
||||
[debug] youtube-dl version 2020.11.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.01**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.01**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.01**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2020.01.01
|
||||
[debug] youtube-dl version 2020.11.24
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.01. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.11.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.01**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2020.11.24**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
@@ -13,7 +13,7 @@ dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
matrix:
|
||||
jobs:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
@@ -35,6 +35,11 @@ matrix:
|
||||
env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
- name: flake8
|
||||
python: 3.8
|
||||
dist: xenial
|
||||
install: pip install flake8
|
||||
script: flake8 .
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
- env: YTDL_TEST_SET=download
|
||||
|
@@ -153,7 +153,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
|
510
ChangeLog
510
ChangeLog
@@ -1,3 +1,481 @@
|
||||
version 2020.11.24
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add generic support for akamai HTTP format extraction
|
||||
|
||||
Extractors
|
||||
* [youtube:tab] Fix feeds extraction (#25695, #26452)
|
||||
* [youtube:favorites] Restore extractor
|
||||
* [youtube:tab] Fix some weird typo (#27157)
|
||||
+ [pinterest] Add support for large collections (more than 25 pins)
|
||||
+ [franceinter] Extract thumbnail (#27153)
|
||||
+ [box] Add support for box.com (#5949)
|
||||
+ [nytimes] Add support for cooking.nytimes.com (#27112, #27143)
|
||||
* [lbry] Relax URL regular expression (#27144)
|
||||
+ [rumble] Add support for embed pages (#10785)
|
||||
+ [skyit] Add support for multiple Sky Italia websites (#26629)
|
||||
+ [pinterest] Add support for pinterest.com (#25747)
|
||||
|
||||
|
||||
version 2020.11.21.1
|
||||
|
||||
Core
|
||||
* [downloader/http] Fix crash during urlopen caused by missing reason
|
||||
of URLError
|
||||
* [YoutubeDL] Fix --ignore-errors for playlists with generator-based entries
|
||||
of url_transparent (#27064)
|
||||
|
||||
Extractors
|
||||
+ [svtplay] Add support for svt.se/barnkanalen (#24817)
|
||||
+ [svt] Extract timestamp (#27130)
|
||||
* [svtplay] Improve thumbnail extraction (#27130)
|
||||
* [youtube] Fix error reason extraction (#27081)
|
||||
* [youtube] Fix like and dislike count extraction (#25977)
|
||||
+ [youtube:tab] Add support for current video and fix lives extraction (#27126)
|
||||
* [infoq] Fix format extraction (#25984)
|
||||
* [francetv] Update to fix thumbnail URL issue (#27120)
|
||||
* [youtube] Improve yt initial data extraction (#27093)
|
||||
+ [discoverynetworks] Add support new TLC/DMAX URLs (#27100)
|
||||
* [rai] Fix protocol relative relinker URLs (#22766)
|
||||
* [rai] Fix unavailable video format detection
|
||||
* [rai] Improve extraction
|
||||
* [rai] Fix extraction (#27077)
|
||||
* [viki] Improve format extraction
|
||||
* [viki] Fix stream extraction from MPD (#27092)
|
||||
* [googledrive] Fix format extraction (#26979)
|
||||
+ [amara] Add support for amara.org (#20618)
|
||||
* [vimeo:album] Fix extraction (#27079)
|
||||
* [mtv] Fix mgid extraction (#26841)
|
||||
|
||||
|
||||
version 2020.11.19
|
||||
|
||||
Core
|
||||
* [extractor/common] Output error for invalid URLs in _is_valid_url (#21400,
|
||||
#24151, #25617, #25618, #25586, #26068, #27072)
|
||||
|
||||
Extractors
|
||||
* [youporn] Fix upload date extraction
|
||||
* [youporn] Make comment count optional (#26986)
|
||||
* [arte] Rework extractors
|
||||
* Reimplement embed and playlist extractors to delegate to the single
|
||||
entrypoint artetv extractor
|
||||
* Improve embeds detection (#27057)
|
||||
+ [arte] Extract m3u8 formats (#27061)
|
||||
* [mgtv] Fix format extraction (#26415)
|
||||
+ [lbry] Add support for odysee.com (#26806)
|
||||
* [francetv] Improve info extraction
|
||||
+ [francetv] Add fallback video URL extraction (#27047)
|
||||
|
||||
|
||||
version 2020.11.18
|
||||
|
||||
Extractors
|
||||
* [spiegel] Fix extraction (#24206, #24767)
|
||||
* [youtube] Improve extraction
|
||||
+ Add support for --no-playlist (#27009)
|
||||
* Improve playlist and mix extraction (#26390, #26509, #26534, #27011)
|
||||
+ Extract playlist uploader data
|
||||
* [youtube:tab] Fix view count extraction (#27051)
|
||||
* [malltv] Fix extraction (#27035)
|
||||
+ [bandcamp] Extract playlist description (#22684)
|
||||
* [urplay] Fix extraction (#26828)
|
||||
* [youtube:tab] Fix playlist title extraction (#27015)
|
||||
* [youtube] Fix chapters extraction (#26005)
|
||||
|
||||
|
||||
version 2020.11.17
|
||||
|
||||
Core
|
||||
* [utils] Skip ! prefixed code in js_to_json
|
||||
|
||||
Extractors
|
||||
* [youtube:tab] Fix extraction with cookies provided (#27005)
|
||||
* [lrt] Fix extraction with empty tags (#20264)
|
||||
+ [ndr:embed:base] Extract subtitles (#25447, #26106)
|
||||
+ [servus] Add support for pm-wissen.com (#25869)
|
||||
* [servus] Fix extraction (#26872, #26967, #26983, #27000)
|
||||
* [xtube] Fix extraction (#26996)
|
||||
* [lrt] Fix extraction
|
||||
+ [lbry] Add support for lbry.tv
|
||||
+ [condenast] Extract subtitles
|
||||
* [condenast] Fix extraction
|
||||
* [bandcamp] Fix extraction (#26681, #26684)
|
||||
* [rai] Fix RaiPlay extraction (#26064, #26096)
|
||||
* [vlive] Fix extraction
|
||||
* [usanetwork] Fix extraction
|
||||
* [nbc] Fix NBCNews/Today/MSNBC extraction
|
||||
* [cnbc] Fix extraction
|
||||
|
||||
|
||||
version 2020.11.12
|
||||
|
||||
Extractors
|
||||
* [youtube] Rework extractors
|
||||
|
||||
|
||||
version 2020.11.01
|
||||
|
||||
Core
|
||||
* [utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
|
||||
* [downloader/http] Properly handle missing message in SSLError (#26646)
|
||||
* [downloader/http] Fix access to not yet opened stream in retry
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix JS player URL extraction
|
||||
* [ytsearch] Fix extraction (#26920)
|
||||
* [afreecatv] Fix typo (#26970)
|
||||
* [23video] Relax URL regular expression (#26870)
|
||||
+ [ustream] Add support for video.ibm.com (#26894)
|
||||
* [iqiyi] Fix typo (#26884)
|
||||
+ [expressen] Add support for di.se (#26670)
|
||||
* [iprima] Improve video id extraction (#26507, #26494)
|
||||
|
||||
|
||||
version 2020.09.20
|
||||
|
||||
Core
|
||||
* [extractor/common] Relax interaction count extraction in _json_ld
|
||||
+ [extractor/common] Extract author as uploader for VideoObject in _json_ld
|
||||
* [downloader/hls] Fix incorrect end byte in Range HTTP header for
|
||||
media segments with EXT-X-BYTERANGE (#14748, #24512)
|
||||
* [extractor/common] Handle ssl.CertificateError in _request_webpage (#26601)
|
||||
* [downloader/http] Improve timeout detection when reading block of data
|
||||
(#10935)
|
||||
* [downloader/http] Retry download when urlopen times out (#10935, #26603)
|
||||
|
||||
Extractors
|
||||
* [redtube] Extend URL regular expression (#26506)
|
||||
* [twitch] Refactor
|
||||
* [twitch:stream] Switch to GraphQL and fix reruns (#26535)
|
||||
+ [telequebec] Add support for brightcove videos (#25833)
|
||||
* [pornhub] Extract metadata from JSON-LD (#26614)
|
||||
* [pornhub] Fix view count extraction (#26621, #26614)
|
||||
|
||||
|
||||
version 2020.09.14
|
||||
|
||||
Core
|
||||
+ [postprocessor/embedthumbnail] Add support for non jpg/png thumbnails
|
||||
(#25687, #25717)
|
||||
|
||||
Extractors
|
||||
* [rtlnl] Extend URL regular expression (#26549, #25821)
|
||||
* [youtube] Fix empty description extraction (#26575, #26006)
|
||||
* [srgssr] Extend URL regular expression (#26555, #26556, #26578)
|
||||
* [googledrive] Use redirect URLs for source format (#18877, #23919, #24689,
|
||||
#26565)
|
||||
* [svtplay] Fix id extraction (#26576)
|
||||
* [redbulltv] Improve support for rebull.com TV localized URLs (#22063)
|
||||
+ [redbulltv] Add support for new redbull.com TV URLs (#22037, #22063)
|
||||
* [soundcloud:pagedplaylist] Reduce pagination limit (#26557)
|
||||
|
||||
|
||||
version 2020.09.06
|
||||
|
||||
Core
|
||||
+ [utils] Recognize wav mimetype (#26463)
|
||||
|
||||
Extractors
|
||||
* [nrktv:episode] Improve video id extraction (#25594, #26369, #26409)
|
||||
* [youtube] Fix age gate content detection (#26100, #26152, #26311, #26384)
|
||||
* [youtube:user] Extend URL regular expression (#26443)
|
||||
* [xhamster] Improve initials regular expression (#26526, #26353)
|
||||
* [svtplay] Fix video id extraction (#26425, #26428, #26438)
|
||||
* [twitch] Rework extractors (#12297, #20414, #20604, #21811, #21812, #22979,
|
||||
#24263, #25010, #25553, #25606)
|
||||
* Switch to GraphQL
|
||||
+ Add support for collections
|
||||
+ Add support for clips and collections playlists
|
||||
* [biqle] Improve video ext extraction
|
||||
* [xhamster] Fix extraction (#26157, #26254)
|
||||
* [xhamster] Extend URL regular expression (#25789, #25804, #25927))
|
||||
|
||||
|
||||
version 2020.07.28
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
|
||||
* [youtube] Improve description extraction (#25937, #25980)
|
||||
* [wistia] Restrict embed regular expression (#25969)
|
||||
* [youtube] Prevent excess HTTP 301 (#25786)
|
||||
+ [youtube:playlists] Extend URL regular expression (#25810)
|
||||
+ [bellmedia] Add support for cp24.com clip URLs (#25764)
|
||||
* [brightcove] Improve embed detection (#25674)
|
||||
|
||||
|
||||
version 2020.06.16.1
|
||||
|
||||
Extractors
|
||||
* [youtube] Force old layout (#25682, #25683, #25680, #25686)
|
||||
* [youtube] Fix categories and improve tags extraction
|
||||
|
||||
|
||||
version 2020.06.16
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix uploader id and uploader URL extraction
|
||||
* [youtube] Improve view count extraction
|
||||
* [youtube] Fix upload date extraction (#25677)
|
||||
* [youtube] Fix thumbnails extraction (#25676)
|
||||
* [youtube] Fix playlist and feed extraction (#25675)
|
||||
+ [facebook] Add support for single-video ID links
|
||||
+ [youtube] Extract chapters from JSON (#24819)
|
||||
+ [kaltura] Add support for multiple embeds on a webpage (#25523)
|
||||
|
||||
|
||||
version 2020.06.06
|
||||
|
||||
Extractors
|
||||
* [tele5] Bypass geo restriction
|
||||
+ [jwplatform] Add support for bypass geo restriction
|
||||
* [tele5] Prefer jwplatform over nexx (#25533)
|
||||
* [twitch:stream] Expect 400 and 410 HTTP errors from API
|
||||
* [twitch:stream] Fix extraction (#25528)
|
||||
* [twitch] Fix thumbnails extraction (#25531)
|
||||
+ [twitch] Pass v5 Accept HTTP header (#25531)
|
||||
* [brightcove] Fix subtitles extraction (#25540)
|
||||
+ [malltv] Add support for sk.mall.tv (#25445)
|
||||
* [periscope] Fix untitled broadcasts (#25482)
|
||||
* [jwplatform] Improve embeds extraction (#25467)
|
||||
|
||||
|
||||
version 2020.05.29
|
||||
|
||||
Core
|
||||
* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
|
||||
* [utils] Fix file permissions in write_json_file (#12471, #25122)
|
||||
|
||||
Extractors
|
||||
* [ard:beta] Extend URL regular expression (#25405)
|
||||
+ [youtube] Add support for more invidious instances (#25417)
|
||||
* [giantbomb] Extend URL regular expression (#25222)
|
||||
* [ard] Improve URL regular expression (#25134, #25198)
|
||||
* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
|
||||
#25321)
|
||||
* [indavideo] Switch to HTTPS for API request (#25191)
|
||||
* [redtube] Improve title extraction (#25208)
|
||||
* [vimeo] Improve format extraction and sorting (#25285)
|
||||
* [soundcloud] Reduce API playlist page limit (#25274)
|
||||
+ [youtube] Add support for yewtu.be (#25226)
|
||||
* [mailru] Fix extraction (#24530, #25239)
|
||||
* [bellator] Fix mgid extraction (#25195)
|
||||
|
||||
|
||||
version 2020.05.08
|
||||
|
||||
Core
|
||||
* [downloader/http] Request last data block of exact remaining size
|
||||
* [downloader/http] Finish downloading once received data length matches
|
||||
expected
|
||||
* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
|
||||
ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
|
||||
+ [compat] Introduce compat_cookiejar_Cookie
|
||||
* [utils] Improve cookie files support
|
||||
+ Add support for UTF-8 in cookie files
|
||||
* Skip malformed cookie file entries instead of crashing (invalid entry
|
||||
length, invalid expires at)
|
||||
|
||||
Extractors
|
||||
* [youtube] Improve signature cipher extraction (#25187, #25188)
|
||||
* [iprima] Improve extraction (#25138)
|
||||
* [uol] Fix extraction (#22007)
|
||||
+ [orf] Add support for more radio stations (#24938, #24968)
|
||||
* [dailymotion] Fix typo
|
||||
- [puhutv] Remove no longer available HTTP formats (#25124)
|
||||
|
||||
|
||||
version 2020.05.03
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract multiple JSON-LD entries
|
||||
* [options] Clarify doc on --exec command (#19087, #24883)
|
||||
* [extractor/common] Skip malformed ISM manifest XMLs while extracting
|
||||
ISM formats (#24667)
|
||||
|
||||
Extractors
|
||||
* [crunchyroll] Fix and improve extraction (#25096, #25060)
|
||||
* [youtube] Improve player id extraction
|
||||
* [youtube] Use redirected video id if any (#25063)
|
||||
* [yahoo] Fix GYAO Player extraction and relax URL regular expression
|
||||
(#24178, #24778)
|
||||
* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
|
||||
* [tenplay] Relax URL regular expression (#25001)
|
||||
+ [prosiebensat1] Extract series metadata
|
||||
* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
|
||||
- [prosiebensat1] Remove 7tv.de support (#24948)
|
||||
* [youtube] Fix DRM videos detection (#24736)
|
||||
* [thisoldhouse] Fix video id extraction (#24548, #24549)
|
||||
+ [soundcloud] Extract AAC format (#19173, #24708)
|
||||
* [youtube] Skip broken multifeed videos (#24711)
|
||||
* [nova:embed] Fix extraction (#24700)
|
||||
* [motherless] Fix extraction (#24699)
|
||||
* [twitch:clips] Extend URL regular expression (#24290, #24642)
|
||||
* [tv4] Fix ISM formats extraction (#24667)
|
||||
* [tele5] Fix extraction (#24553)
|
||||
+ [mofosex] Add support for generic embeds (#24633)
|
||||
+ [youporn] Add support for generic embeds
|
||||
+ [spankwire] Add support for generic embeds (#24633)
|
||||
* [spankwire] Fix extraction (#18924, #20648)
|
||||
|
||||
|
||||
version 2020.03.24
|
||||
|
||||
Core
|
||||
- [utils] Revert support for cookie files with spaces used instead of tabs
|
||||
|
||||
Extractors
|
||||
* [teachable] Update upskillcourses and gns3 domains
|
||||
* [generic] Look for teachable embeds before wistia
|
||||
+ [teachable] Extract chapter metadata (#24421)
|
||||
+ [bilibili] Add support for player.bilibili.com (#24402)
|
||||
+ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
|
||||
* [limelight] Remove disabled API requests (#24255)
|
||||
* [soundcloud] Fix download URL extraction (#24394)
|
||||
+ [cbc:watch] Add support for authentication (#19160)
|
||||
* [hellporno] Fix extraction (#24399)
|
||||
* [xtube] Fix formats extraction (#24348)
|
||||
* [ndr] Fix extraction (#24326)
|
||||
* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
|
||||
- [nhk] Remove obsolete rtmp formats (#24329)
|
||||
* [nhk] Relax URL regular expression (#24329)
|
||||
- [vimeo] Revert fix showcase password protected video extraction (#24224)
|
||||
|
||||
|
||||
version 2020.03.08
|
||||
|
||||
Core
|
||||
+ [utils] Add support for cookie files with spaces used instead of tabs
|
||||
|
||||
Extractors
|
||||
+ [pornhub] Add support for pornhubpremium.com (#24288)
|
||||
- [youtube] Remove outdated code and unnecessary requests
|
||||
* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
|
||||
* [nhk] Update API version (#24270)
|
||||
|
||||
|
||||
version 2020.03.06
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix age-gated videos support without login (#24248)
|
||||
* [vimeo] Fix showcase password protected video extraction (#24224)
|
||||
* [pornhub] Improve title extraction (#24184)
|
||||
* [peertube] Improve extraction (#23657)
|
||||
+ [servus] Add support for new URL schema (#23475, #23583, #24142)
|
||||
* [vimeo] Fix subtitles URLs (#24209)
|
||||
|
||||
|
||||
version 2020.03.01
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Force redirect URL to unicode on python 2
|
||||
- [options] Remove duplicate short option -v for --version (#24162)
|
||||
|
||||
Extractors
|
||||
* [xhamster] Fix extraction (#24205)
|
||||
* [franceculture] Fix extraction (#24204)
|
||||
+ [telecinco] Add support for article opening videos
|
||||
* [telecinco] Fix extraction (#24195)
|
||||
* [xtube] Fix metadata extraction (#21073, #22455)
|
||||
* [youjizz] Fix extraction (#24181)
|
||||
- Remove no longer needed compat_str around geturl
|
||||
* [pornhd] Fix extraction (#24128)
|
||||
+ [teachable] Add support for multiple videos per lecture (#24101)
|
||||
+ [wistia] Add support for multiple generic embeds (#8347, 11385)
|
||||
* [imdb] Fix extraction (#23443)
|
||||
* [tv2dk:bornholm:play] Fix extraction (#24076)
|
||||
|
||||
|
||||
version 2020.02.16
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
|
||||
#10622)
|
||||
* [update] Fix updating via symlinks (#23991)
|
||||
+ [compat] Introduce compat_realpath (#23991)
|
||||
|
||||
Extractors
|
||||
+ [npr] Add support for streams (#24042)
|
||||
+ [24video] Add support for porn.24video.net (#23779, #23784)
|
||||
- [jpopsuki] Remove extractor (#23858)
|
||||
* [nova] Improve extraction (#23690)
|
||||
* [nova:embed] Improve (#23690)
|
||||
* [nova:embed] Fix extraction (#23672)
|
||||
+ [abc:iview] Add support for 720p (#22907, #22921)
|
||||
* [nytimes] Improve format sorting (#24010)
|
||||
+ [toggle] Add support for mewatch.sg (#23895, #23930)
|
||||
* [thisoldhouse] Fix extraction (#23951)
|
||||
+ [popcorntimes] Add support for popcorntimes.tv (#23949)
|
||||
* [sportdeutschland] Update to new API
|
||||
* [twitch:stream] Lowercase channel id for stream request (#23917)
|
||||
* [tv5mondeplus] Fix extraction (#23907, #23911)
|
||||
* [tva] Relax URL regular expression (#23903)
|
||||
* [vimeo] Fix album extraction (#23864)
|
||||
* [viewlift] Improve extraction
|
||||
* Fix extraction (#23851)
|
||||
+ Add support for authentication
|
||||
+ Add support for more domains
|
||||
* [svt] Fix series extraction (#22297)
|
||||
* [svt] Fix article extraction (#22897, #22919)
|
||||
* [soundcloud] Improve private playlist/set tracks extraction (#3707)
|
||||
|
||||
|
||||
version 2020.01.24
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix sigfunc name extraction (#23819)
|
||||
* [stretchinternet] Fix extraction (#4319)
|
||||
* [voicerepublic] Fix extraction
|
||||
* [azmedien] Fix extraction (#23783)
|
||||
* [businessinsider] Fix jwplatform id extraction (#22929, #22954)
|
||||
+ [24video] Add support for 24video.vip (#23753)
|
||||
* [ivi:compilation] Fix entries extraction (#23770)
|
||||
* [ard] Improve extraction (#23761)
|
||||
* Simplify extraction
|
||||
+ Extract age limit and series
|
||||
* Bypass geo-restriction
|
||||
+ [nbc] Add support for nbc multi network URLs (#23049)
|
||||
* [americastestkitchen] Fix extraction
|
||||
* [zype] Improve extraction
|
||||
+ Extract subtitles (#21258)
|
||||
+ Support URLs with alternative keys/tokens (#21258)
|
||||
+ Extract more metadata
|
||||
* [orf:tvthek] Improve geo restricted videos detection (#23741)
|
||||
* [soundcloud] Restore previews extraction (#23739)
|
||||
|
||||
|
||||
version 2020.01.15
|
||||
|
||||
Extractors
|
||||
* [yourporn] Fix extraction (#21645, #22255, #23459)
|
||||
+ [canvas] Add support for new API endpoint (#17680, #18629)
|
||||
* [ndr:base:embed] Improve thumbnails extraction (#23731)
|
||||
+ [vodplatform] Add support for embed.kwikmotion.com domain
|
||||
+ [twitter] Add support for promo_video_website cards (#23711)
|
||||
* [orf:radio] Clean description and improve extraction
|
||||
* [orf:fm4] Fix extraction (#23599)
|
||||
* [safari] Fix kaltura session extraction (#23679, #23670)
|
||||
* [lego] Fix extraction and extract subtitle (#23687)
|
||||
* [cloudflarestream] Improve extraction
|
||||
+ Add support for bytehighway.net domain
|
||||
+ Add support for signed URLs
|
||||
+ Extract thumbnail
|
||||
* [naver] Improve extraction
|
||||
* Improve geo-restriction handling
|
||||
+ Extract automatic captions
|
||||
+ Extract uploader metadata
|
||||
+ Extract VLive HLS formats
|
||||
* Improve metadata extraction
|
||||
- [pandatv] Remove extractor (#23630)
|
||||
* [dctp] Fix format extraction (#23656)
|
||||
+ [scrippsnetworks] Add support for www.discovery.com videos
|
||||
* [discovery] Fix anonymous token extraction (#23650)
|
||||
* [nrktv:seriebase] Fix extraction (#23625, #23537)
|
||||
* [wistia] Improve format extraction and extract subtitles (#22590)
|
||||
* [vice] Improve extraction (#23631)
|
||||
* [redtube] Detect private videos (#23518)
|
||||
|
||||
|
||||
version 2020.01.01
|
||||
|
||||
Extractors
|
||||
@@ -65,7 +543,7 @@ Extractors
|
||||
* [abcotvs] Relax URL regular expression and improve metadata extraction
|
||||
(#18014)
|
||||
* [channel9] Reduce response size
|
||||
* [adobetv] Improve extaction
|
||||
* [adobetv] Improve extraction
|
||||
* Use OnDemandPagedList for list extractors
|
||||
* Reduce show extraction requests
|
||||
* Extract original video format and subtitles
|
||||
@@ -90,7 +568,7 @@ Extractors
|
||||
* [dailymotion] Improve extraction
|
||||
* Extract http formats included in m3u8 manifest
|
||||
* Fix user extraction (#3553, #21415)
|
||||
+ Add suport for User Authentication (#11491)
|
||||
+ Add support for User Authentication (#11491)
|
||||
* Fix password protected videos extraction (#23176)
|
||||
* Respect age limit option and family filter cookie value (#18437)
|
||||
* Handle video url playlist query param
|
||||
@@ -175,7 +653,7 @@ Extractors
|
||||
- [go90] Remove extractor
|
||||
* [kakao] Remove raw request
|
||||
+ [kakao] Extract format total bitrate
|
||||
* [daum] Fix VOD and Clip extracton (#15015)
|
||||
* [daum] Fix VOD and Clip extraction (#15015)
|
||||
* [kakao] Improve extraction
|
||||
+ Add support for embed URLs
|
||||
+ Add support for Kakao Legacy vid based embed URLs
|
||||
@@ -219,7 +697,7 @@ Extractors
|
||||
* Improve format extraction (#22123)
|
||||
+ Extract uploader_id and uploader_url (#21916)
|
||||
+ Extract all known thumbnails (#19071, #20659)
|
||||
* Fix extration for private playlists (#20976)
|
||||
* Fix extraction for private playlists (#20976)
|
||||
+ Add support for playlist embeds (#20976)
|
||||
* Skip preview formats (#22806)
|
||||
* [dplay] Improve extraction
|
||||
@@ -694,7 +1172,7 @@ Extractors
|
||||
* [hbo] Fix extraction and extract subtitles (#14629, #13709)
|
||||
* [youtube] Extract srv[1-3] subtitle formats (#20566)
|
||||
* [adultswim] Fix extraction (#18025)
|
||||
* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
|
||||
* [teamcoco] Fix extraction and add support for subdomains (#17099, #20339)
|
||||
* [adn] Fix subtitle compatibility with ffmpeg
|
||||
* [adn] Fix extraction and add support for positioning styles (#20549)
|
||||
* [vk] Use unique video id (#17848)
|
||||
@@ -1106,7 +1584,7 @@ version 2018.11.18
|
||||
|
||||
Extractors
|
||||
+ [wwe] Extract subtitles
|
||||
+ [wwe] Add support for playlistst (#14781)
|
||||
+ [wwe] Add support for playlists (#14781)
|
||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||
* [vk] Detect geo restriction (#17767)
|
||||
* [openload] Use original host during extraction (#18211)
|
||||
@@ -2139,7 +2617,7 @@ Extractors
|
||||
* [youku] Update ccode (#14872)
|
||||
* [mnet] Fix format extraction (#14883)
|
||||
+ [xiami] Add Referer header to API request
|
||||
* [mtv] Correct scc extention in extracted subtitles (#13730)
|
||||
* [mtv] Correct scc extension in extracted subtitles (#13730)
|
||||
* [vvvvid] Fix extraction for kenc videos (#13406)
|
||||
+ [br] Add support for BR Mediathek videos (#14560, #14788)
|
||||
+ [daisuki] Add support for motto.daisuki.com (#14681)
|
||||
@@ -2160,7 +2638,7 @@ Extractors
|
||||
* [nexx] Extract more formats
|
||||
+ [openload] Add support for openload.link (#14763)
|
||||
* [empflix] Relax URL regular expression
|
||||
* [empflix] Fix extractrion
|
||||
* [empflix] Fix extraction
|
||||
* [tnaflix] Don't modify download URLs (#14811)
|
||||
- [gamersyde] Remove extractor
|
||||
* [francetv:generationwhat] Fix extraction
|
||||
@@ -2355,7 +2833,7 @@ Extractors
|
||||
* [yahoo] Bypass geo restriction for brightcove (#14210)
|
||||
* [yahoo] Use extracted brightcove account id (#14210)
|
||||
* [rtve:alacarta] Fix extraction (#14290)
|
||||
+ [yahoo] Add support for custom brigthcove embeds (#14210)
|
||||
+ [yahoo] Add support for custom brightcove embeds (#14210)
|
||||
+ [generic] Add support for Video.js embeds
|
||||
+ [gfycat] Add support for /gifs/detail URLs (#14322)
|
||||
* [generic] Fix infinite recursion for twitter:player URLs (#14339)
|
||||
@@ -2600,7 +3078,7 @@ Extractors
|
||||
* [amcnetworks] Make rating optional (#12453)
|
||||
* [cloudy] Fix extraction (#13737)
|
||||
+ [nickru] Add support for nickelodeon.ru
|
||||
* [mtv] Improve thumbnal extraction
|
||||
* [mtv] Improve thumbnail extraction
|
||||
* [nick] Automate geo-restriction bypass (#13711)
|
||||
* [niconico] Improve error reporting (#13696)
|
||||
|
||||
@@ -2964,7 +3442,7 @@ Extractors
|
||||
+ [cda] Support birthday verification (#12789)
|
||||
* [leeco] Fix extraction (#12974)
|
||||
+ [pbs] Extract chapters
|
||||
* [amp] Imporove thumbnail and subtitles extraction
|
||||
* [amp] Improve thumbnail and subtitles extraction
|
||||
* [foxsports] Fix extraction (#12945)
|
||||
- [coub] Remove comment count extraction (#12941)
|
||||
|
||||
@@ -3134,7 +3612,7 @@ Extractors
|
||||
+ [rbmaradio] Add support for redbullradio.com URLs (#12687)
|
||||
+ [npo:live] Add support for default URL (#12555)
|
||||
* [mixcloud:playlist] Fix title, description and view count extraction (#12582)
|
||||
+ [thesun] Add suport for thesun.co.uk (#11298, #12674)
|
||||
+ [thesun] Add support for thesun.co.uk (#11298, #12674)
|
||||
+ [ceskateleveize:porady] Add support for porady (#7411, #12645)
|
||||
* [ceskateleveize] Improve extraction and remove URL replacement hacks
|
||||
+ [kaltura] Add support for iframe embeds (#12679)
|
||||
@@ -3173,7 +3651,7 @@ Extractors
|
||||
* [funimation] Fix extraction (#10696, #11773)
|
||||
+ [xfileshare] Add support for vidabc.com (#12589)
|
||||
+ [xfileshare] Improve extraction and extract hls formats
|
||||
+ [crunchyroll] Pass geo verifcation proxy
|
||||
+ [crunchyroll] Pass geo verification proxy
|
||||
+ [cwtv] Extract ISM formats
|
||||
+ [tvplay] Bypass geo restriction
|
||||
+ [vrv] Add support for vrv.co
|
||||
@@ -3237,7 +3715,7 @@ Extractors
|
||||
+ [bostonglobe] Add extractor for bostonglobe.com (#12099)
|
||||
+ [toongoggles] Add support for toongoggles.com (#12171)
|
||||
+ [medialaan] Add support for Medialaan sites (#9974, #11912)
|
||||
+ [discoverynetworks] Add support for more domains and bypass geo restiction
|
||||
+ [discoverynetworks] Add support for more domains and bypass geo restriction
|
||||
* [openload] Fix extraction (#10408)
|
||||
|
||||
|
||||
@@ -4827,7 +5305,7 @@ version 2016.07.09.1
|
||||
Fixed/improved extractors
|
||||
- youtube
|
||||
- ard
|
||||
- srmediatek (#9373)
|
||||
- srmediathek (#9373)
|
||||
|
||||
|
||||
version 2016.07.09
|
||||
@@ -4891,7 +5369,7 @@ Fixed/improved extractors
|
||||
- kaltura (#5557)
|
||||
- la7
|
||||
- Changed features
|
||||
- Rename --cn-verfication-proxy to --geo-verification-proxy
|
||||
- Rename --cn-verification-proxy to --geo-verification-proxy
|
||||
Miscellaneous
|
||||
- Add script for displaying downloads statistics
|
||||
|
||||
|
14
README.md
14
README.md
@@ -434,9 +434,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
--exec CMD Execute a command on the file after
|
||||
downloading, similar to find's -exec
|
||||
syntax. Example: --exec 'adb push {}
|
||||
/sdcard/Music/ && rm {}'
|
||||
downloading and post-processing, similar to
|
||||
find's -exec syntax. Example: --exec 'adb
|
||||
push {} /sdcard/Music/ && rm {}'
|
||||
--convert-subs FORMAT Convert the subtitles to other format
|
||||
(currently supported: srt|ass|vtt|lrc)
|
||||
|
||||
@@ -545,7 +545,7 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch when creating the file
|
||||
- `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||
- `playlist` (string): Name or id of the playlist that contains the video
|
||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
||||
- `playlist_id` (string): Playlist identifier
|
||||
@@ -835,7 +835,9 @@ In February 2015, the new YouTube player contained a character sequence in a str
|
||||
|
||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||
These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-youtube-dl) to youtube-dl. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
|
||||
|
||||
If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||
|
||||
### SyntaxError: Non-ASCII character
|
||||
|
||||
@@ -1030,7 +1032,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
||||
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
|
||||
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
|
||||
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 youtube_dl/extractor/yourextractor.py
|
||||
|
||||
|
@@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import mimetypes
|
||||
|
@@ -61,7 +61,7 @@ def build_lazy_ie(ie, name):
|
||||
return s
|
||||
|
||||
|
||||
# find the correct sorting and add the required base classes so that sublcasses
|
||||
# find the correct sorting and add the required base classes so that subclasses
|
||||
# can be correctly created
|
||||
classes = _ALL_CLASSES[:-1]
|
||||
ordered_cls = []
|
||||
|
@@ -41,6 +41,7 @@
|
||||
- **AlJazeera**
|
||||
- **Allocine**
|
||||
- **AlphaPorno**
|
||||
- **Amara**
|
||||
- **AMCNetworks**
|
||||
- **AmericasTestKitchen**
|
||||
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
@@ -58,9 +59,10 @@
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:playlist**
|
||||
- **arte.sky.it**
|
||||
- **ArteTV**
|
||||
- **ArteTVEmbed**
|
||||
- **ArteTVPlaylist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
- **AtresPlayer**
|
||||
@@ -98,6 +100,7 @@
|
||||
- **BiliBili**
|
||||
- **BilibiliAudio**
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliPlayer**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
@@ -108,6 +111,7 @@
|
||||
- **Bloomberg**
|
||||
- **BokeCC**
|
||||
- **BostonGlobe**
|
||||
- **Box**
|
||||
- **Bpb**: Bundeszentrale für politische Bildung
|
||||
- **BR**: Bayerischer Rundfunk
|
||||
- **BravoTV**
|
||||
@@ -155,6 +159,7 @@
|
||||
- **Chilloutzone**
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **cielotv.it**
|
||||
- **Cinchcast**
|
||||
- **Cinemax**
|
||||
- **CiscoLiveSearch**
|
||||
@@ -389,7 +394,6 @@
|
||||
- **JeuxVideo**
|
||||
- **Joj**
|
||||
- **Jove**
|
||||
- **jpopsuki.tv**
|
||||
- **JWPlatform**
|
||||
- **Kakao**
|
||||
- **Kaltura**
|
||||
@@ -417,6 +421,7 @@
|
||||
- **la7.it**
|
||||
- **laola1tv**
|
||||
- **laola1tv:embed**
|
||||
- **lbry.tv**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
@@ -497,6 +502,7 @@
|
||||
- **MNetTV**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
- **MofosexEmbed**
|
||||
- **Mojvideo**
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
@@ -605,6 +611,7 @@
|
||||
- **Nuvid**
|
||||
- **NYTimes**
|
||||
- **NYTimesArticle**
|
||||
- **NYTimesCooking**
|
||||
- **NZZ**
|
||||
- **ocw.mit.edu**
|
||||
- **OdaTV**
|
||||
@@ -619,16 +626,25 @@
|
||||
- **Ooyala**
|
||||
- **OoyalaExternal**
|
||||
- **OraTV**
|
||||
- **orf:burgenland**: Radio Burgenland
|
||||
- **orf:fm4**: radio FM4
|
||||
- **orf:fm4:story**: fm4.orf.at stories
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:kaernten**: Radio Kärnten
|
||||
- **orf:noe**: Radio Niederösterreich
|
||||
- **orf:oberoesterreich**: Radio Oberösterreich
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:oe3**: Radio Österreich 3
|
||||
- **orf:salzburg**: Radio Salzburg
|
||||
- **orf:steiermark**: Radio Steiermark
|
||||
- **orf:tirol**: Radio Tirol
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **orf:vorarlberg**: Radio Vorarlberg
|
||||
- **orf:wien**: Radio Wien
|
||||
- **OsnatelTV**
|
||||
- **OutsideTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
@@ -648,10 +664,13 @@
|
||||
- **PicartoVod**
|
||||
- **Piksel**
|
||||
- **Pinkbike**
|
||||
- **Pinterest**
|
||||
- **PinterestCollection**
|
||||
- **Pladform**
|
||||
- **Platzi**
|
||||
- **PlatziCourse**
|
||||
- **play.fm**
|
||||
- **player.sky.it**
|
||||
- **PlayPlusTV**
|
||||
- **PlaysTV**
|
||||
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
|
||||
@@ -664,6 +683,7 @@
|
||||
- **Pokemon**
|
||||
- **PolskieRadio**
|
||||
- **PolskieRadioCategory**
|
||||
- **Popcorntimes**
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
@@ -706,6 +726,8 @@
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBull**
|
||||
- **RedBullEmbed**
|
||||
- **RedBullTV**
|
||||
- **RedBullTVRrnContent**
|
||||
- **Reddit**
|
||||
@@ -740,6 +762,7 @@
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **RUHD**
|
||||
- **RumbleEmbed**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
- **rutube:embed**: Rutube embedded videos
|
||||
@@ -777,6 +800,8 @@
|
||||
- **Shared**: shared.sx
|
||||
- **ShowRoomLive**
|
||||
- **Sina**
|
||||
- **sky.it**
|
||||
- **skyacademy.it**
|
||||
- **SkylineWebcams**
|
||||
- **SkyNews**
|
||||
- **skynewsarabia:article**
|
||||
@@ -810,8 +835,6 @@
|
||||
- **SpankBangPlaylist**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
- **Spiegeltv**
|
||||
- **sport.francetvinfo.fr**
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
@@ -917,6 +940,7 @@
|
||||
- **TV2DKBornholmPlay**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **tv8.it**
|
||||
- **TVA**
|
||||
- **TVANouvelles**
|
||||
- **TVANouvellesArticle**
|
||||
@@ -939,16 +963,13 @@
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:profile**
|
||||
- **twitch:stream**
|
||||
- **twitch:video**
|
||||
- **twitch:videos:all**
|
||||
- **twitch:videos:highlights**
|
||||
- **twitch:videos:past-broadcasts**
|
||||
- **twitch:videos:uploads**
|
||||
- **twitch:vod**
|
||||
- **TwitchCollection**
|
||||
- **TwitchVideos**
|
||||
- **TwitchVideosClips**
|
||||
- **TwitchVideosCollections**
|
||||
- **twitter**
|
||||
- **twitter:amplify**
|
||||
- **twitter:broadcast**
|
||||
@@ -991,6 +1012,8 @@
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.sky.it**
|
||||
- **video.sky.it:live**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videomore**
|
||||
@@ -1005,8 +1028,8 @@
|
||||
- **Vidzi**
|
||||
- **vier**: vier.be and vijf.be
|
||||
- **vier:videos**
|
||||
- **ViewLift**
|
||||
- **ViewLiftEmbed**
|
||||
- **viewlift**
|
||||
- **viewlift:embed**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
- **viki:channel**
|
||||
@@ -1032,7 +1055,6 @@
|
||||
- **vk:wallpost**
|
||||
- **vlive**
|
||||
- **vlive:channel**
|
||||
- **vlive:playlist**
|
||||
- **Vodlocker**
|
||||
- **VODPl**
|
||||
- **VODPlatform**
|
||||
@@ -1121,20 +1143,16 @@
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
|
||||
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
|
||||
- **youtube:live**: YouTube.com live streams
|
||||
- **youtube:playlist**: YouTube.com playlists
|
||||
- **youtube:playlists**: YouTube.com user/channel playlists
|
||||
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
|
||||
- **youtube:search**: YouTube.com searches
|
||||
- **youtube:search:date**: YouTube.com searches, newest videos first
|
||||
- **youtube:search_url**: YouTube.com search URLs
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:tab**: YouTube.com tab
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **YoutubeYtUser**
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **Zattoo**
|
||||
|
@@ -37,7 +37,7 @@
|
||||
"writeinfojson": true,
|
||||
"writesubtitles": false,
|
||||
"allsubtitles": false,
|
||||
"listssubtitles": false,
|
||||
"listsubtitles": false,
|
||||
"socket_timeout": 20,
|
||||
"fixup": "never"
|
||||
}
|
||||
|
@@ -816,11 +816,15 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
'webpage_url': 'http://example.com',
|
||||
}
|
||||
|
||||
def get_ids(params):
|
||||
def get_downloaded_info_dicts(params):
|
||||
ydl = YDL(params)
|
||||
# make a copy because the dictionary can be modified
|
||||
ydl.process_ie_result(playlist.copy())
|
||||
return [int(v['id']) for v in ydl.downloaded_info_dicts]
|
||||
# make a deep copy because the dictionary and nested entries
|
||||
# can be modified
|
||||
ydl.process_ie_result(copy.deepcopy(playlist))
|
||||
return ydl.downloaded_info_dicts
|
||||
|
||||
def get_ids(params):
|
||||
return [int(v['id']) for v in get_downloaded_info_dicts(params)]
|
||||
|
||||
result = get_ids({})
|
||||
self.assertEqual(result, [1, 2, 3, 4])
|
||||
@@ -852,6 +856,22 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
||||
self.assertEqual(result, [2, 3, 4])
|
||||
|
||||
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
|
||||
# @{
|
||||
result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
|
||||
self.assertEqual(result[0]['playlist_index'], 2)
|
||||
self.assertEqual(result[1]['playlist_index'], 3)
|
||||
|
||||
result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
|
||||
self.assertEqual(result[0]['playlist_index'], 2)
|
||||
self.assertEqual(result[1]['playlist_index'], 3)
|
||||
self.assertEqual(result[2]['playlist_index'], 4)
|
||||
|
||||
result = get_downloaded_info_dicts({'playlist_items': '4,2'})
|
||||
self.assertEqual(result[0]['playlist_index'], 4)
|
||||
self.assertEqual(result[1]['playlist_index'], 2)
|
||||
# @}
|
||||
|
||||
def test_urlopen_no_file_protocol(self):
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||
ydl = YDL()
|
||||
@@ -899,6 +919,76 @@ class TestYoutubeDL(unittest.TestCase):
|
||||
self.assertEqual(downloaded['extractor'], 'testex')
|
||||
self.assertEqual(downloaded['extractor_key'], 'TestEx')
|
||||
|
||||
# Test case for https://github.com/ytdl-org/youtube-dl/issues/27064
|
||||
def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self):
|
||||
|
||||
class _YDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(_YDL, self).__init__(*args, **kwargs)
|
||||
|
||||
def trouble(self, s, tb=None):
|
||||
pass
|
||||
|
||||
ydl = _YDL({
|
||||
'format': 'extra',
|
||||
'ignoreerrors': True,
|
||||
})
|
||||
|
||||
class VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'video:(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
formats = [{
|
||||
'format_id': 'default',
|
||||
'url': 'url:',
|
||||
}]
|
||||
if video_id == '0':
|
||||
raise ExtractorError('foo')
|
||||
if video_id == '2':
|
||||
formats.append({
|
||||
'format_id': 'extra',
|
||||
'url': TEST_URL,
|
||||
})
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': 'Video %s' % video_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
class PlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'playlist:'
|
||||
|
||||
def _entries(self):
|
||||
for n in range(3):
|
||||
video_id = compat_str(n)
|
||||
yield {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': VideoIE.ie_key(),
|
||||
'id': video_id,
|
||||
'url': 'video:%s' % video_id,
|
||||
'title': 'Video Transparent %s' % video_id,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.playlist_result(self._entries())
|
||||
|
||||
ydl.add_info_extractor(VideoIE(ydl))
|
||||
ydl.add_info_extractor(PlaylistIE(ydl))
|
||||
info = ydl.extract_info('playlist:')
|
||||
entries = info['entries']
|
||||
self.assertEqual(len(entries), 3)
|
||||
self.assertTrue(entries[0] is None)
|
||||
self.assertTrue(entries[1] is None)
|
||||
self.assertEqual(len(ydl.downloaded_info_dicts), 1)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(entries[2], downloaded)
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'Video Transparent 2')
|
||||
self.assertEqual(downloaded['id'], '2')
|
||||
self.assertEqual(downloaded['extractor'], 'Video')
|
||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
assert_cookie_has_value('HTTPONLY_COOKIE')
|
||||
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
||||
|
||||
def test_malformed_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
# Cookies should be empty since all malformed cookie file entries
|
||||
# will be ignored
|
||||
self.assertFalse(cookiejar._cookies)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -31,16 +31,17 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
|
||||
def test_youtube_playlist_matching(self):
|
||||
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||
assertTab = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||
assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585
|
||||
assertPlaylist('PL63F0C78739B09958')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
|
||||
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
|
||||
# Top tracks
|
||||
assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
assertTab('https://www.youtube.com/playlist?list=MCUS.20142101')
|
||||
|
||||
def test_youtube_matching(self):
|
||||
self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M'))
|
||||
@@ -51,26 +52,23 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube'])
|
||||
|
||||
def test_youtube_channel_matching(self):
|
||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
||||
assertChannel = lambda url: self.assertMatch(url, ['youtube:tab'])
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||
|
||||
def test_youtube_user_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
# def test_youtube_user_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
|
||||
|
||||
def test_youtube_show_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
||||
|
||||
def test_youtube_search_matching(self):
|
||||
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
# def test_youtube_search_matching(self):
|
||||
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||
|
||||
def test_youtube_extract(self):
|
||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||
|
@@ -26,7 +26,6 @@ from youtube_dl.extractor import (
|
||||
ThePlatformIE,
|
||||
ThePlatformFeedIE,
|
||||
RTVEALaCartaIE,
|
||||
FunnyOrDieIE,
|
||||
DemocracynowIE,
|
||||
)
|
||||
|
||||
@@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||
|
||||
|
||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
||||
IE = FunnyOrDieIE
|
||||
|
||||
def test_allsubtitles(self):
|
||||
self.DL.params['writesubtitles'] = True
|
||||
self.DL.params['allsubtitles'] = True
|
||||
subtitles = self.getSubtitles()
|
||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
||||
|
||||
|
||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||
IE = DemocracynowIE
|
||||
|
@@ -803,6 +803,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(mimetype2ext('text/vtt'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt')
|
||||
self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html')
|
||||
self.assertEqual(mimetype2ext('audio/x-wav'), 'wav')
|
||||
self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav')
|
||||
|
||||
def test_month_by_name(self):
|
||||
self.assertEqual(month_by_name(None), None)
|
||||
@@ -935,6 +937,28 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(d['x'], 1)
|
||||
self.assertEqual(d['y'], 'a')
|
||||
|
||||
# Just drop ! prefix for now though this results in a wrong value
|
||||
on = js_to_json('''{
|
||||
a: !0,
|
||||
b: !1,
|
||||
c: !!0,
|
||||
d: !!42.42,
|
||||
e: !!![],
|
||||
f: !"abc",
|
||||
g: !"",
|
||||
!42: 42
|
||||
}''')
|
||||
self.assertEqual(json.loads(on), {
|
||||
'a': 0,
|
||||
'b': 1,
|
||||
'c': 0,
|
||||
'd': 42.42,
|
||||
'e': [],
|
||||
'f': "abc",
|
||||
'g': "",
|
||||
'42': 42
|
||||
})
|
||||
|
||||
on = js_to_json('["abc", "def",]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
@@ -992,6 +1016,12 @@ class TestUtil(unittest.TestCase):
|
||||
on = js_to_json('{42:4.2e1}')
|
||||
self.assertEqual(json.loads(on), {'42': 42.0})
|
||||
|
||||
on = js_to_json('{ "0x40": "0x40" }')
|
||||
self.assertEqual(json.loads(on), {'0x40': '0x40'})
|
||||
|
||||
on = js_to_json('{ "040": "040" }')
|
||||
self.assertEqual(json.loads(on), {'040': '040'})
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
@@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase):
|
||||
for description, duration, expected_chapters in self._TEST_CASES:
|
||||
ie = YoutubeIE()
|
||||
expect_value(
|
||||
self, ie._extract_chapters(description, duration),
|
||||
self, ie._extract_chapters_from_description(description, duration),
|
||||
expected_chapters, None)
|
||||
|
||||
|
||||
|
@@ -74,6 +74,28 @@ _TESTS = [
|
||||
]
|
||||
|
||||
|
||||
class TestPlayerInfo(unittest.TestCase):
|
||||
def test_youtube_extract_player_info(self):
|
||||
PLAYER_URLS = (
|
||||
('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
|
||||
# obsolete
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
|
||||
('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
|
||||
('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
|
||||
('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
|
||||
('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
|
||||
('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
|
||||
('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
|
||||
)
|
||||
for player_url, expected_player_id in PLAYER_URLS:
|
||||
expected_player_type = player_url.split('.')[-1]
|
||||
player_type, player_id = YoutubeIE._extract_player_info(player_url)
|
||||
self.assertEqual(player_type, expected_player_type)
|
||||
self.assertEqual(player_id, expected_player_id)
|
||||
|
||||
|
||||
class TestSignature(unittest.TestCase):
|
||||
def setUp(self):
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
9
test/testdata/cookies/malformed_cookies.txt
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
# Cookie file entry with invalid number of fields - 6 instead of 7
|
||||
www.foobar.foobar FALSE / FALSE 0 COOKIE
|
||||
|
||||
# Cookie file entry with invalid expires at
|
||||
www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
|
@@ -92,6 +92,7 @@ from .utils import (
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
YoutubeDLRedirectHandler,
|
||||
)
|
||||
from .cache import Cache
|
||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||
@@ -792,21 +793,14 @@ class YoutubeDL(object):
|
||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||
'and will probably not work.')
|
||||
|
||||
return self.__extract_info(url, ie, download, extra_info, process)
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
|
||||
def __handle_extraction_exceptions(func):
|
||||
def wrapper(self, *args, **kwargs):
|
||||
try:
|
||||
ie_result = ie.extract(url)
|
||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||
break
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
self.add_default_extra_info(ie_result, ie, url)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
return ie_result
|
||||
return func(self, *args, **kwargs)
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
@@ -814,20 +808,33 @@ class YoutubeDL(object):
|
||||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
break
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
self.report_error(compat_str(e), e.format_traceback())
|
||||
break
|
||||
except MaxDownloadsReached:
|
||||
raise
|
||||
except Exception as e:
|
||||
if self.params.get('ignoreerrors', False):
|
||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||
break
|
||||
else:
|
||||
raise
|
||||
return wrapper
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __extract_info(self, url, ie, download, extra_info, process):
|
||||
ie_result = ie.extract(url)
|
||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||
return
|
||||
if isinstance(ie_result, list):
|
||||
# Backwards compatibility: old IE result format
|
||||
ie_result = {
|
||||
'_type': 'compat_list',
|
||||
'entries': ie_result,
|
||||
}
|
||||
self.add_default_extra_info(ie_result, ie, url)
|
||||
if process:
|
||||
return self.process_ie_result(ie_result, download, extra_info)
|
||||
else:
|
||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||
return ie_result
|
||||
|
||||
def add_default_extra_info(self, ie_result, ie, url):
|
||||
self.add_extra_info(ie_result, {
|
||||
@@ -990,7 +997,7 @@ class YoutubeDL(object):
|
||||
'playlist_title': ie_result.get('title'),
|
||||
'playlist_uploader': ie_result.get('uploader'),
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_index': i + playliststart,
|
||||
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||
'extractor': ie_result['extractor'],
|
||||
'webpage_url': ie_result['webpage_url'],
|
||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||
@@ -1002,9 +1009,8 @@ class YoutubeDL(object):
|
||||
self.to_screen('[download] ' + reason)
|
||||
continue
|
||||
|
||||
entry_result = self.process_ie_result(entry,
|
||||
download=download,
|
||||
extra_info=extra)
|
||||
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||
# TODO: skip failed (empty) entries?
|
||||
playlist_results.append(entry_result)
|
||||
ie_result['entries'] = playlist_results
|
||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||
@@ -1033,6 +1039,11 @@ class YoutubeDL(object):
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
|
||||
@__handle_extraction_exceptions
|
||||
def __process_iterable_entry(self, entry, download, extra_info):
|
||||
return self.process_ie_result(
|
||||
entry, download=download, extra_info=extra_info)
|
||||
|
||||
def _build_format_filter(self, filter_spec):
|
||||
" Returns a function to filter the formats according to the filter_spec "
|
||||
|
||||
@@ -2343,6 +2354,7 @@ class YoutubeDL(object):
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
redirect_handler = YoutubeDLRedirectHandler()
|
||||
data_handler = compat_urllib_request_DataHandler()
|
||||
|
||||
# When passing our own FileHandler instance, build_opener won't add the
|
||||
@@ -2356,7 +2368,7 @@ class YoutubeDL(object):
|
||||
file_handler.file_open = file_open
|
||||
|
||||
opener = compat_urllib_request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
|
@@ -57,6 +57,17 @@ try:
|
||||
except ImportError: # Python 2
|
||||
import cookielib as compat_cookiejar
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
||||
def __init__(self, version, name, value, *args, **kwargs):
|
||||
if isinstance(name, compat_str):
|
||||
name = name.encode()
|
||||
if isinstance(value, compat_str):
|
||||
value = value.encode()
|
||||
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
||||
else:
|
||||
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||
|
||||
try:
|
||||
import http.cookies as compat_cookies
|
||||
except ImportError: # Python 2
|
||||
@@ -2334,7 +2345,7 @@ except ImportError: # Python <3.4
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
# Python 3.5. Introducing dummy exception for Python >3.5 for compatible
|
||||
# and uniform cross-version exceptiong handling
|
||||
# and uniform cross-version exception handling
|
||||
class compat_HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
@@ -2754,6 +2765,17 @@ else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
|
||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
# os.path.realpath on Windows does not follow symbolic links
|
||||
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||
def compat_realpath(path):
|
||||
while os.path.islink(path):
|
||||
path = os.path.abspath(os.readlink(path))
|
||||
return path
|
||||
else:
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
def compat_print(s):
|
||||
from .utils import preferredencoding
|
||||
@@ -2976,6 +2998,7 @@ __all__ = [
|
||||
'compat_basestring',
|
||||
'compat_chr',
|
||||
'compat_cookiejar',
|
||||
'compat_cookiejar_Cookie',
|
||||
'compat_cookies',
|
||||
'compat_ctypes_WINFUNCTYPE',
|
||||
'compat_etree_Element',
|
||||
@@ -2998,6 +3021,7 @@ __all__ = [
|
||||
'compat_os_name',
|
||||
'compat_parse_qs',
|
||||
'compat_print',
|
||||
'compat_realpath',
|
||||
'compat_setenv',
|
||||
'compat_shlex_quote',
|
||||
'compat_shlex_split',
|
||||
|
@@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
|
||||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
success = ctx['dl'].download(fragment_filename, {
|
||||
fragment_info_dict = {
|
||||
'url': frag_url,
|
||||
'http_headers': headers or info_dict.get('http_headers'),
|
||||
})
|
||||
}
|
||||
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||
if not success:
|
||||
return False, None
|
||||
if fragment_info_dict.get('filetime'):
|
||||
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
@@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
|
||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||
else:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
if self.params.get('updatetime', True):
|
||||
filetime = ctx.get('fragment_filetime')
|
||||
if filetime:
|
||||
try:
|
||||
os.utime(ctx['filename'], (time.time(), filetime))
|
||||
except Exception:
|
||||
pass
|
||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
||||
self._hook_progress({
|
||||
|
@@ -141,7 +141,7 @@ class HlsFD(FragmentFD):
|
||||
count = 0
|
||||
headers = info_dict.get('http_headers', {})
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success, frag_content = self._download_fragment(
|
||||
|
@@ -106,7 +106,14 @@ class HttpFD(FileDownloader):
|
||||
set_range(request, range_start, range_end)
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
except (compat_urllib_error.URLError, ) as err:
|
||||
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||
reason = getattr(err, 'reason', None)
|
||||
if isinstance(reason, socket.timeout):
|
||||
raise RetryDownload(err)
|
||||
raise err
|
||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||
# that don't support resuming and serve a whole file with no Content-Range
|
||||
@@ -218,24 +225,27 @@ class HttpFD(FileDownloader):
|
||||
|
||||
def retry(e):
|
||||
to_stdout = ctx.tmpfilename == '-'
|
||||
if not to_stdout:
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
if ctx.stream is not None:
|
||||
if not to_stdout:
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
|
||||
# socket.timeout is a subclass of socket.error but may not have
|
||||
# errno set
|
||||
except socket.timeout as e:
|
||||
retry(e)
|
||||
except socket.error as e:
|
||||
if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
|
||||
raise
|
||||
retry(e)
|
||||
# SSLError on python 2 (inherits socket.error) may have
|
||||
# no errno set but this error message
|
||||
if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
|
||||
retry(e)
|
||||
raise
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
||||
@@ -299,7 +309,7 @@ class HttpFD(FileDownloader):
|
||||
'elapsed': now - ctx.start_time,
|
||||
})
|
||||
|
||||
if is_test and byte_counter == data_len:
|
||||
if data_len is not None and byte_counter == data_len:
|
||||
break
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
|
||||
|
@@ -110,17 +110,17 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'ZX9371A050S00',
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Gaston's Birthday",
|
||||
'series': "Ben And Holly's Little Kingdom",
|
||||
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
||||
'upload_date': '20180604',
|
||||
'uploader_id': 'abc4kids',
|
||||
'timestamp': 1528140219,
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -148,7 +148,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
'hdnea': token,
|
||||
})
|
||||
|
||||
for sd in ('sd', 'sd-low'):
|
||||
for sd in ('720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
if not sd_url:
|
||||
|
@@ -275,7 +275,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
video_element = video_xml.findall(compat_xpath('./track/video'))[-1]
|
||||
if video_element is None or video_element.text is None:
|
||||
raise ExtractorError(
|
||||
'Video %s video does not exist' % video_id, expected=True)
|
||||
'Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_url = video_element.text.strip()
|
||||
|
||||
|
103
youtube_dl/extractor/amara.py
Normal file
103
youtube_dl/extractor/amara.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AmaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# Youtube
|
||||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||
'info_dict': {
|
||||
'id': 'h6ZuVdvYnfE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why jury trials are becoming less common',
|
||||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20160813',
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||
'info_dict': {
|
||||
'id': '18622084',
|
||||
'ext': 'mov',
|
||||
'title': 'Vimeo at CES 2011!',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||
'info_dict': {
|
||||
'id': 's8KL7I3jLmh6',
|
||||
'ext': 'mp4',
|
||||
'title': 'The danger of a single story',
|
||||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
subtitles = {}
|
||||
for language in (meta.get('languages') or []):
|
||||
subtitles_uri = language.get('subtitles_uri')
|
||||
if not (subtitles_uri and language.get('published')):
|
||||
continue
|
||||
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||
for f in ('json', 'srt', 'vtt'):
|
||||
subtitle.append({
|
||||
'ext': f,
|
||||
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||
})
|
||||
|
||||
info = {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('thumbnail'),
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'timestamp': parse_iso8601(meta.get('created')),
|
||||
}
|
||||
|
||||
for ie in (YoutubeIE, VimeoIE):
|
||||
if ie.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': ie.ie_key(),
|
||||
})
|
||||
break
|
||||
|
||||
return info
|
@@ -5,6 +5,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
@@ -13,22 +14,21 @@ from ..utils import (
|
||||
class AmericasTestKitchenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
|
||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||
'info_dict': {
|
||||
'id': '1_5g5zua6e',
|
||||
'title': 'Summer Dinner Party',
|
||||
'id': '5b400b9ee338f922cb06450c',
|
||||
'title': 'Weeknight Japanese Suppers',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1497285541,
|
||||
'upload_date': '20170612',
|
||||
'uploader_id': 'roger.metcalf@americastestkitchen.com',
|
||||
'release_date': '20170617',
|
||||
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
||||
'thumbnail': r're:^https?://',
|
||||
'timestamp': 1523664000,
|
||||
'upload_date': '20180414',
|
||||
'release_date': '20180414',
|
||||
'series': "America's Test Kitchen",
|
||||
'season_number': 17,
|
||||
'episode': 'Summer Dinner Party',
|
||||
'episode_number': 24,
|
||||
'season_number': 18,
|
||||
'episode': 'Weeknight Japanese Suppers',
|
||||
'episode_number': 15,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -47,7 +47,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
webpage, 'initial context'),
|
||||
video_id)
|
||||
video_id, js_to_json)
|
||||
|
||||
ep_data = try_get(
|
||||
video_data,
|
||||
@@ -55,17 +55,7 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
|
||||
zype_id = ep_meta.get('zype_id')
|
||||
if zype_id:
|
||||
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||
ie_key = 'Zype'
|
||||
else:
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||
ie_key = 'Kaltura'
|
||||
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
@@ -79,8 +69,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'ie_key': ie_key,
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
||||
'ie_key': 'Zype',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -22,7 +23,101 @@ from ..utils import (
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
class ARDMediathekBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
|
||||
|
||||
def _parse_media_info(self, media_info, video_id, fsk):
|
||||
formats = self._extract_formats(media_info, video_id)
|
||||
|
||||
if not formats:
|
||||
if fsk:
|
||||
raise ExtractorError(
|
||||
'This video is only available after 20:00', expected=True)
|
||||
elif media_info.get('_geoblocked'):
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available due to geoblocking',
|
||||
countries=self._GEO_COUNTRIES)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': int_or_none(media_info.get('_duration')),
|
||||
'thumbnail': media_info.get('_previewImage'),
|
||||
'is_live': media_info.get('_isLive') is True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
formats = []
|
||||
for num, media in enumerate(media_array):
|
||||
for stream in media.get('_mediaStreamArray', []):
|
||||
stream_urls = stream.get('_stream')
|
||||
if not stream_urls:
|
||||
continue
|
||||
if not isinstance(stream_urls, list):
|
||||
stream_urls = [stream_urls]
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
}
|
||||
m = re.search(
|
||||
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||
stream_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
return formats
|
||||
|
||||
|
||||
class ARDMediathekIE(ARDMediathekBaseIE):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
@@ -63,94 +158,6 @@ class ARDMediathekIE(InfoExtractor):
|
||||
def suitable(cls, url):
|
||||
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
|
||||
|
||||
def _extract_media_info(self, media_info_url, webpage, video_id):
|
||||
media_info = self._download_json(
|
||||
media_info_url, video_id, 'Downloading media JSON')
|
||||
|
||||
formats = self._extract_formats(media_info, video_id)
|
||||
|
||||
if not formats:
|
||||
if '"fsk"' in webpage:
|
||||
raise ExtractorError(
|
||||
'This video is only available after 20:00', expected=True)
|
||||
elif media_info.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geo restriction', expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(media_info.get('_duration'))
|
||||
thumbnail = media_info.get('_previewImage')
|
||||
is_live = media_info.get('_isLive') is True
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = media_info.get('_subtitleUrl')
|
||||
if subtitle_url:
|
||||
subtitles['de'] = [{
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_formats(self, media_info, video_id):
|
||||
type_ = media_info.get('_type')
|
||||
media_array = media_info.get('_mediaArray', [])
|
||||
formats = []
|
||||
for num, media in enumerate(media_array):
|
||||
for stream in media.get('_mediaStreamArray', []):
|
||||
stream_urls = stream.get('_stream')
|
||||
if not stream_urls:
|
||||
continue
|
||||
if not isinstance(stream_urls, list):
|
||||
stream_urls = [stream_urls]
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
continue
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
if server and server.startswith('rtmp'):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
}
|
||||
m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
|
||||
if m:
|
||||
f.update({
|
||||
'width': int(m.group('width')),
|
||||
'height': int(m.group('height')),
|
||||
})
|
||||
if type_ == 'audio':
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
# determine video id from url
|
||||
m = re.match(self._VALID_URL, url)
|
||||
@@ -242,7 +249,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
|
||||
|
||||
class ARDIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
||||
_TESTS = [{
|
||||
# available till 14.02.2019
|
||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
||||
@@ -256,6 +263,9 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': '20180214',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||
'only_matching': True,
|
||||
@@ -302,21 +312,31 @@ class ARDIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
|
||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'title': 'Tatort: Die robuste Roswita',
|
||||
'id': '70153354',
|
||||
'title': 'Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
|
||||
'upload_date': '20180826',
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
||||
'timestamp': 1577047500,
|
||||
'upload_date': '20191222',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
|
||||
'only_matching': True,
|
||||
@@ -328,73 +348,75 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
display_id = mobj.group('display_id')
|
||||
if display_id:
|
||||
display_id = display_id.rstrip('/')
|
||||
if not display_id:
|
||||
display_id = video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
|
||||
res = {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
player_page = self._download_json(
|
||||
'https://api.ardmediathek.de/public-gateway',
|
||||
display_id, data=json.dumps({
|
||||
'query': '''{
|
||||
playerPage(client:"%s", clipId: "%s") {
|
||||
blockedByFsk
|
||||
broadcastedOn
|
||||
maturityContentRating
|
||||
mediaCollection {
|
||||
_duration
|
||||
_geoblocked
|
||||
_isLive
|
||||
_mediaArray {
|
||||
_mediaStreamArray {
|
||||
_quality
|
||||
_server
|
||||
_stream
|
||||
}
|
||||
formats = []
|
||||
subtitles = {}
|
||||
geoblocked = False
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked') is True:
|
||||
geoblocked = True
|
||||
if '_duration' in widget:
|
||||
res['duration'] = int_or_none(widget['_duration'])
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
subtitle_url = url_or_none(widget.get('_subtitleUrl'))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('de', []).append({
|
||||
'ext': 'ttml',
|
||||
'url': subtitle_url,
|
||||
})
|
||||
if '_quality' in widget:
|
||||
format_url = url_or_none(try_get(
|
||||
widget, lambda x: x['_stream']['json'][0]))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
# HTTP formats are not available when geoblocked is True,
|
||||
# other formats are fine though
|
||||
if geoblocked:
|
||||
continue
|
||||
quality = str_or_none(widget.get('_quality'))
|
||||
formats.append({
|
||||
'format_id': ('http-' + quality) if quality else 'http',
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
if not formats and geoblocked:
|
||||
self.raise_geo_restricted(
|
||||
msg='This video is not available due to geoblocking',
|
||||
countries=['DE'])
|
||||
|
||||
self._sort_formats(formats)
|
||||
res.update({
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
_previewImage
|
||||
_subtitleUrl
|
||||
_type
|
||||
}
|
||||
show {
|
||||
title
|
||||
}
|
||||
synopsis
|
||||
title
|
||||
tracking {
|
||||
atiCustomVars {
|
||||
contentId
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (mobj.group('client'), video_id),
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json'
|
||||
})['data']['playerPage']
|
||||
title = player_page['title']
|
||||
content_id = str_or_none(try_get(
|
||||
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
|
||||
media_collection = player_page.get('mediaCollection') or {}
|
||||
if not media_collection and content_id:
|
||||
media_collection = self._download_json(
|
||||
'https://www.ardmediathek.de/play/media/' + content_id,
|
||||
content_id, fatal=False) or {}
|
||||
info = self._parse_media_info(
|
||||
media_collection, content_id or video_id,
|
||||
player_page.get('blockedByFsk'))
|
||||
age_limit = None
|
||||
description = player_page.get('synopsis')
|
||||
maturity_content_rating = player_page.get('maturityContentRating')
|
||||
if maturity_content_rating:
|
||||
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
|
||||
if not age_limit and description:
|
||||
age_limit = int_or_none(self._search_regex(
|
||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||
info.update({
|
||||
'age_limit': age_limit,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||
'series': try_get(player_page, lambda x: x['show']['title']),
|
||||
})
|
||||
|
||||
return res
|
||||
return info
|
||||
|
@@ -4,23 +4,57 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
# There are different sources of video in arte.tv, the extraction process
|
||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
||||
# add tests.
|
||||
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||
|
||||
|
||||
class ArteTVIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||
)
|
||||
/(?P<id>\d{6}-\d{3}-[AF])
|
||||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
|
||||
info = self._download_json(
|
||||
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
|
||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||
@@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
if not upload_date_str:
|
||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||
|
||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
||||
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||
subtitle = player_info.get('VSU', '').strip()
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
info_dict = {
|
||||
'id': player_info['VID'],
|
||||
'title': title,
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||
|
||||
LANGS = {
|
||||
@@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
formats = []
|
||||
for format_id, format_dict in vsr.items():
|
||||
f = dict(format_dict)
|
||||
format_url = url_or_none(f.get('url'))
|
||||
streamer = f.get('streamer')
|
||||
if not format_url and not streamer:
|
||||
continue
|
||||
versionCode = f.get('versionCode')
|
||||
l = re.escape(langcode)
|
||||
|
||||
@@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
else:
|
||||
lang_pref = -1
|
||||
|
||||
media_type = f.get('mediaType')
|
||||
if media_type == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format['language_preference'] = lang_pref
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
format = {
|
||||
'format_id': format_id,
|
||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||
@@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
'quality': qfunc(f.get('quality')),
|
||||
}
|
||||
|
||||
if f.get('mediaType') == 'rtmp':
|
||||
if media_type == 'rtmp':
|
||||
format['url'] = f['streamer']
|
||||
format['play_path'] = 'mp4:' + f['url']
|
||||
format['ext'] = 'flv'
|
||||
@@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
return {
|
||||
'id': player_info.get('VID') or video_id,
|
||||
'title': title,
|
||||
'description': player_info.get('VDE'),
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
||||
|
||||
class ArteTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'id': '100605-013-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
'title': 'United we Stream November Lockdown Edition #13',
|
||||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||
'upload_date': '20201116',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(
|
||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
||||
video_id, lang)
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
https://www\.arte\.tv
|
||||
/player/v3/index\.php\?json_url=
|
||||
(?P<json_url>
|
||||
https?://api\.arte\.tv/api/player/v1/config/
|
||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
json_url = qs['json_url'][0]
|
||||
video_id = ArteTVIE._match_id(json_url)
|
||||
return self.url_result(
|
||||
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'info_dict': {
|
||||
@@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
'%s/collectionData/%s/%s?source=videos'
|
||||
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||
entries = []
|
||||
for video in collection['videos']:
|
||||
if not isinstance(video, dict):
|
||||
continue
|
||||
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
video_id = video.get('programId')
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'alt_title': video.get('subtitle'),
|
||||
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||
'duration': int_or_none(video.get('durationSeconds')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'ie_key': ArteTVIE.ie_key(),
|
||||
})
|
||||
title = collection.get('title')
|
||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||
entries = [
|
||||
self._extract_from_json_url(
|
||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
||||
for video in collection['videos'] if video.get('jsonUrl')]
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
@@ -47,39 +47,19 @@ class AZMedienIE(InfoExtractor):
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
entry_id = mobj.group('kaltura_id')
|
||||
host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
if not entry_id:
|
||||
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
||||
payload = {
|
||||
'query': '''query VideoContext($articleId: ID!) {
|
||||
article: node(id: $articleId) {
|
||||
... on Article {
|
||||
mainAssetRelation {
|
||||
asset {
|
||||
... on VideoAsset {
|
||||
kalturaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||
}
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
data=json.dumps(payload).encode())
|
||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||
entry_id = self._download_json(
|
||||
self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
|
||||
'variables': json.dumps({
|
||||
'contextId': 'NewsArticle:' + article_id,
|
||||
}),
|
||||
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
@@ -5,10 +6,7 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
@@ -17,30 +15,32 @@ from ..utils import (
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
'info_dict': {
|
||||
'id': '1812978515',
|
||||
'ext': 'mp3',
|
||||
'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
|
||||
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭",
|
||||
'duration': 9.8485,
|
||||
'uploader': 'youtube-dl "\'/\\ä↭',
|
||||
'upload_date': '20121129',
|
||||
'timestamp': 1354224127,
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
@@ -79,11 +79,16 @@ class BandcampIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||
return self._parse_json(self._html_search_regex(
|
||||
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||
attr + ' data', group=2), video_id, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
title = mobj.group('title')
|
||||
title = self._match_id(url)
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
tralbum = self._extract_data_attr(webpage, title)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
track_id = None
|
||||
track = None
|
||||
@@ -91,10 +96,7 @@ class BandcampIE(InfoExtractor):
|
||||
duration = None
|
||||
|
||||
formats = []
|
||||
track_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
||||
webpage, 'track info', default='{}'), title)
|
||||
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
@@ -111,37 +113,25 @@ class BandcampIE(InfoExtractor):
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track_id = str_or_none(
|
||||
track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
def extract(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||
current = tralbum.get('current') or {}
|
||||
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
download_link = tralbum.get('freeDownloadPage')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'track id')
|
||||
track_id = compat_str(tralbum['id'])
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
|
||||
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
@@ -207,20 +197,20 @@ class BandcampIE(InfoExtractor):
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'album': embed.get('album_title'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BandcampAlbumIE(InfoExtractor):
|
||||
class BandcampAlbumIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:album'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||
@@ -230,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1353101989',
|
||||
'ext': 'mp3',
|
||||
'title': 'Intro',
|
||||
'title': 'Blazo - Intro',
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -238,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '38097443',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
||||
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
],
|
||||
@@ -274,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'title': '"Entropy" EP',
|
||||
'uploader_id': 'jstrecords',
|
||||
'id': 'entropy-ep',
|
||||
'description': 'md5:0ff22959c943622972596062f2f366a5',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
@@ -283,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
'id': 'we-are-the-plague',
|
||||
'title': 'WE ARE THE PLAGUE',
|
||||
'uploader_id': 'insulters',
|
||||
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}]
|
||||
@@ -294,41 +292,34 @@ class BandcampAlbumIE(InfoExtractor):
|
||||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
uploader_id = mobj.group('subdomain')
|
||||
album_id = mobj.group('album_id')
|
||||
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
|
||||
playlist_id = album_id or uploader_id
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
track_elements = re.findall(
|
||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
||||
if not track_elements:
|
||||
tralbum = self._extract_data_attr(webpage, playlist_id)
|
||||
track_info = tralbum.get('trackinfo')
|
||||
if not track_info:
|
||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||
# Only tracks with duration info have songs
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(url, t_path),
|
||||
ie=BandcampIE.ie_key(),
|
||||
video_title=self._search_regex(
|
||||
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||
elem_content, 'track title', fatal=False))
|
||||
for elem_content, t_path in track_elements
|
||||
if self._html_search_meta('duration', elem_content, default=None)]
|
||||
urljoin(url, t['title_link']), BandcampIE.ie_key(),
|
||||
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
|
||||
for t in track_info
|
||||
if t.get('duration')]
|
||||
|
||||
current = tralbum.get('current') or {}
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
||||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = title.replace(r'\"', '"')
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'uploader_id': uploader_id,
|
||||
'id': playlist_id,
|
||||
'title': title,
|
||||
'title': current.get('title'),
|
||||
'description': current.get('about'),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class BandcampWeeklyIE(InfoExtractor):
|
||||
class BandcampWeeklyIE(BandcampIE):
|
||||
IE_NAME = 'Bandcamp:weekly'
|
||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
@@ -343,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
'release_date': '20170404',
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': 'Magic Moments',
|
||||
'episode_number': 208,
|
||||
'episode_id': '224',
|
||||
}
|
||||
},
|
||||
'params': {
|
||||
'format': 'opus-lo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||
|
||||
show = blob['bcw_show']
|
||||
|
||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
||||
# which happens to expose the latest Bandcamp Weekly episode.
|
||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
||||
show = blob['bcw_data'][show_id]
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
@@ -390,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
|
||||
episode_number = None
|
||||
seq = blob.get('bcw_seq')
|
||||
|
||||
if seq and isinstance(seq, list):
|
||||
try:
|
||||
episode_number = next(
|
||||
int_or_none(e.get('episode_number'))
|
||||
for e in seq
|
||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': show_id,
|
||||
'title': title,
|
||||
'description': show.get('desc') or show.get('short_desc'),
|
||||
'duration': float_or_none(show.get('audio_duration')),
|
||||
@@ -411,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
'release_date': unified_strdate(show.get('published_date')),
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_number': episode_number,
|
||||
'episode_id': compat_str(video_id),
|
||||
'episode_id': show_id,
|
||||
'formats': formats
|
||||
}
|
||||
|
@@ -528,7 +528,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def get_programme_id(item):
|
||||
def get_from_attributes(item):
|
||||
for p in('identifier', 'group'):
|
||||
for p in ('identifier', 'group'):
|
||||
value = item.get(p)
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
@@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE):
|
||||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
webpage, 'group id', default=None)
|
||||
if playlist_id:
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
@@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE):
|
||||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
'bbcthree config', default='{}'),
|
||||
playlist_id, transform_source=js_to_json, fatal=False)
|
||||
if bbc3_config:
|
||||
playlist_id, transform_source=js_to_json, fatal=False) or {}
|
||||
payload = bbc3_config.get('payload') or {}
|
||||
if payload:
|
||||
clip = payload.get('currentClip') or {}
|
||||
clip_vpid = clip.get('vpid')
|
||||
clip_title = clip.get('title')
|
||||
if clip_vpid and clip_title:
|
||||
formats, subtitles = self._download_media_selector(clip_vpid)
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': clip_vpid,
|
||||
'title': clip_title,
|
||||
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
|
||||
'description': clip.get('description'),
|
||||
'duration': parse_duration(clip.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
bbc3_playlist = try_get(
|
||||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
||||
payload, lambda x: x['content']['bbcMedia']['playlist'],
|
||||
dict)
|
||||
if bbc3_playlist:
|
||||
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||
@@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE):
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
initial_data = self._parse_json(self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
|
||||
item_id = item.get('id')
|
||||
item_title = item.get('title')
|
||||
if not (item_id and item_title):
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': item_id,
|
||||
'title': item_title,
|
||||
'thumbnail': item.get('holdingImageUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||
if block.get('type') != 'media':
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
|
@@ -25,8 +25,8 @@ class BellMediaIE(InfoExtractor):
|
||||
etalk|
|
||||
marilyn
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
(?:much|cp24)\.com
|
||||
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||
@@ -62,6 +62,9 @@ class BellMediaIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cp24.com/video?clipId=1982548',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
|
@@ -24,7 +24,18 @@ from ..utils import (
|
||||
|
||||
|
||||
class BiliBiliIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:(?:www|bangumi)\.)?
|
||||
bilibili\.(?:tv|com)/
|
||||
(?:
|
||||
(?:
|
||||
video/[aA][vV]|
|
||||
anime/(?P<anime_id>\d+)/play\#
|
||||
)(?P<id_bv>\d+)|
|
||||
video/[bB][vV](?P<id>[^/?#&]+)
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||
@@ -92,6 +103,10 @@ class BiliBiliIE(InfoExtractor):
|
||||
'skip_download': True, # Test metadata only
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
# new BV video id format
|
||||
'url': 'https://www.bilibili.com/video/BV1JE411F741',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_APP_KEY = 'iVGUTjsxvpLeuDCf'
|
||||
@@ -109,7 +124,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = mobj.group('id') or mobj.group('id_bv')
|
||||
anime_id = mobj.group('anime_id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -419,3 +434,17 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
|
||||
entries, am_id, album_title, album_data.get('intro'))
|
||||
|
||||
return self.playlist_result(entries, am_id)
|
||||
|
||||
|
||||
class BiliBiliPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://www.bilibili.tv/video/av%s/' % video_id,
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||
|
@@ -3,10 +3,11 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vk import VKIE
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class BIQLEIE(InfoExtractor):
|
||||
@@ -47,9 +48,16 @@ class BIQLEIE(InfoExtractor):
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
||||
self._request_webpage(
|
||||
HEADRequest(embed_url), video_id, headers={'Referer': url})
|
||||
video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
|
||||
embed_page = self._download_webpage(
|
||||
embed_url, video_id, headers={'Referer': url})
|
||||
video_ext = self._get_cookies(embed_url).get('video_ext')
|
||||
if video_ext:
|
||||
video_ext = compat_urllib_parse_unquote(video_ext.value)
|
||||
if not video_ext:
|
||||
video_ext = compat_b64decode(self._search_regex(
|
||||
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
|
||||
embed_page, 'video_ext')).decode()
|
||||
video_id, sig, _, access_token = video_ext.split(':')
|
||||
item = self._download_json(
|
||||
'https://api.vk.com/method/video.get', video_id,
|
||||
headers={'User-Agent': 'okhttp/3.4.1'}, query={
|
||||
|
98
youtube_dl/extractor/box.py
Normal file
98
youtube_dl/extractor/box.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
# try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
'info_dict': {
|
||||
'id': '510727257538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||
'uploader': 'MLS Video',
|
||||
'timestamp': 1566320259,
|
||||
'upload_date': '20190820',
|
||||
'uploader_id': '235196876',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, file_id)
|
||||
request_token = self._parse_json(self._search_regex(
|
||||
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||
'Box config'), file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
'BoxApi': 'shared_link=' + shared_link,
|
||||
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||
}, query={
|
||||
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||
})
|
||||
title = f['name']
|
||||
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'shared_link': shared_link
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||
# entry_url_template = try_get(
|
||||
# entry, lambda x: x['content']['url_template'])
|
||||
# if not entry_url_template:
|
||||
# continue
|
||||
# representation = entry.get('representation')
|
||||
# if representation == 'dash':
|
||||
# TODO: append query to every fragment URL
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||
# file_id, query=query))
|
||||
|
||||
authenticated_download_url = f.get('authenticated_download_url')
|
||||
if authenticated_download_url and f.get('is_download_available'):
|
||||
formats.append({
|
||||
'ext': f.get('extension') or determine_ext(title),
|
||||
'filesize': f.get('size'),
|
||||
'format_id': 'download',
|
||||
'url': update_url_query(authenticated_download_url, query),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = f.get('created_by') or {}
|
||||
|
||||
return {
|
||||
'id': file_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': f.get('description') or None,
|
||||
'uploader': creator.get('name'),
|
||||
'timestamp': parse_iso8601(f.get('created_at')),
|
||||
'uploader_id': creator.get('id'),
|
||||
}
|
@@ -5,32 +5,34 @@ import base64
|
||||
import re
|
||||
import struct
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
UnsupportedError,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -145,7 +147,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
def _build_brightcove_url(cls, object_str):
|
||||
"""
|
||||
Build a Brightcove url from a xml string containing
|
||||
<object class="BrightcoveExperience">{params}</object>
|
||||
@@ -215,7 +217,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
return cls._make_brightcove_url(params)
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url_from_js(cls, object_js):
|
||||
def _build_brightcove_url_from_js(cls, object_js):
|
||||
# The layout of JS is as follows:
|
||||
# customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
|
||||
# // build Brightcove <object /> XML
|
||||
@@ -270,12 +272,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
).+?>\s*</object>''',
|
||||
webpage)
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
return list(filter(None, [cls._build_brightcove_url(m) for m in matches]))
|
||||
|
||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||
if matches:
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
cls._build_brightcove_url_from_js(custom_bc)
|
||||
for custom_bc in matches]))
|
||||
return [src for _, src in re.findall(
|
||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||
@@ -424,7 +426,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
# [2] looks like:
|
||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||
r'''(?isx)
|
||||
(<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||
(<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||
(?:.*?
|
||||
(<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
@@ -553,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
subtitles = {}
|
||||
for text_track in json_data.get('text_tracks', []):
|
||||
if text_track.get('src'):
|
||||
subtitles.setdefault(text_track.get('srclang'), []).append({
|
||||
'url': text_track['src'],
|
||||
})
|
||||
if text_track.get('kind') != 'captions':
|
||||
continue
|
||||
text_track_url = url_or_none(text_track.get('src'))
|
||||
if not text_track_url:
|
||||
continue
|
||||
lang = (str_or_none(text_track.get('srclang'))
|
||||
or str_or_none(text_track.get('label')) or 'en').lower()
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': text_track_url,
|
||||
})
|
||||
|
||||
is_live = False
|
||||
duration = float_or_none(json_data.get('duration'), 1000)
|
||||
|
@@ -9,21 +9,26 @@ class BusinessInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
|
||||
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
||||
'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
|
||||
'info_dict': {
|
||||
'id': 'hZRllCfw',
|
||||
'id': 'cjGDb0X9',
|
||||
'ext': 'mp4',
|
||||
'title': "Here's how much radiation you're exposed to in everyday life",
|
||||
'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
|
||||
'upload_date': '20170709',
|
||||
'timestamp': 1499606400,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
|
||||
'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
|
||||
'upload_date': '20160611',
|
||||
'timestamp': 1465675620,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
|
||||
'only_matching': True,
|
||||
'md5': '43f438dbc6da0b89f5ac42f68529d84a',
|
||||
'info_dict': {
|
||||
'id': '5zJwd4FK',
|
||||
'ext': 'mp4',
|
||||
'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
|
||||
'description': 'md5:2af8975825d38a4fed24717bbe51db49',
|
||||
'upload_date': '20170705',
|
||||
'timestamp': 1499270528,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||
'only_matching': True,
|
||||
@@ -35,7 +40,8 @@ class BusinessInsiderIE(InfoExtractor):
|
||||
jwplatform_id = self._search_regex(
|
||||
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
|
||||
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
|
||||
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
|
||||
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
|
||||
r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
|
||||
webpage, 'jwplatform id')
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||
|
@@ -13,6 +13,8 @@ from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -20,15 +22,15 @@ class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||
'info_dict': {
|
||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nachtwacht: De Greystook',
|
||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
||||
'description': 'Nachtwacht: De Greystook',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1468.03,
|
||||
'duration': 1468.04,
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||
}, {
|
||||
@@ -39,23 +41,45 @@ class CanvasIE(InfoExtractor):
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8',
|
||||
}
|
||||
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||
|
||||
# Old API endpoint, serves more formats but may fail for some videos
|
||||
data = self._download_json(
|
||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||
% (site_id, video_id), video_id)
|
||||
% (site_id, video_id), video_id, 'Downloading asset JSON',
|
||||
'Unable to download asset JSON', fatal=False)
|
||||
|
||||
# New API endpoint
|
||||
if not data:
|
||||
token = self._download_json(
|
||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||
'Downloading token', data=b'',
|
||||
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
|
||||
data = self._download_json(
|
||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||
video_id, 'Downloading video JSON', fatal=False, query={
|
||||
'vrtPlayerToken': token,
|
||||
'client': '%s@PROD' % site_id,
|
||||
}, expected_status=400)
|
||||
message = data.get('message')
|
||||
if message and not data.get('title'):
|
||||
if data.get('code') == 'AUTHENTICATION_REQUIRED':
|
||||
self.raise_login_required(message)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
title = data['title']
|
||||
description = data.get('description')
|
||||
|
||||
formats = []
|
||||
for target in data['targetUrls']:
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
format_type = format_type.upper()
|
||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
||||
@@ -134,20 +158,20 @@ class CanvasEenIE(InfoExtractor):
|
||||
},
|
||||
'skip': 'Pagina niet gevonden',
|
||||
}, {
|
||||
'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
|
||||
'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
|
||||
'info_dict': {
|
||||
'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
|
||||
'display_id': 'herbekijk-sorry-voor-alles',
|
||||
'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
|
||||
'display_id': 'emma-pakt-thilly-aan',
|
||||
'ext': 'mp4',
|
||||
'title': 'Herbekijk Sorry voor alles',
|
||||
'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
|
||||
'title': 'Emma pakt Thilly aan',
|
||||
'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 3788.06,
|
||||
'duration': 118.24,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Episode no longer available',
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
|
||||
'only_matching': True,
|
||||
@@ -183,19 +207,44 @@ class VrtNUIE(GigyaBaseIE):
|
||||
IE_DESC = 'VrtNU.be'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# Available via old API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'De zwarte weduwe',
|
||||
'description': 'md5:d90c21dced7db869a85db89a623998d4',
|
||||
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
|
||||
'duration': 1457.04,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'season': '1',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['is not a supported codec'],
|
||||
}, {
|
||||
# Only available via new API endpoint
|
||||
'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
|
||||
'info_dict': {
|
||||
'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aflevering 5',
|
||||
'description': 'Wie valt door de mand tijdens een missie?',
|
||||
'duration': 2967.06,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode_number': 5,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
'params': {
|
||||
'username': '<snip>',
|
||||
'password': '<snip>',
|
||||
},
|
||||
'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
|
||||
}]
|
||||
_NETRC_MACHINE = 'vrtnu'
|
||||
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
|
||||
|
@@ -1,8 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import re
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -216,6 +218,29 @@ class CBCWatchBaseIE(InfoExtractor):
|
||||
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
|
||||
}
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
|
||||
_TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
|
||||
_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
|
||||
_NETRC_MACHINE = 'cbcwatch'
|
||||
|
||||
def _signature(self, email, password):
|
||||
data = json.dumps({
|
||||
'email': email,
|
||||
'password': password,
|
||||
}).encode()
|
||||
headers = {'content-type': 'application/json'}
|
||||
query = {'apikey': self._API_KEY}
|
||||
resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
|
||||
access_token = resp['access_token']
|
||||
|
||||
# token
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'apikey': self._API_KEY,
|
||||
'jwtapp': 'jwt',
|
||||
}
|
||||
resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
|
||||
return resp['signature']
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
url = path if path.startswith('http') else self._API_BASE_URL + path
|
||||
@@ -239,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
if self._valid_device_token():
|
||||
return
|
||||
device = self._downloader.cache.load('cbcwatch', 'device') or {}
|
||||
device = self._downloader.cache.load(
|
||||
'cbcwatch', self._cache_device_key()) or {}
|
||||
self._device_id, self._device_token = device.get('id'), device.get('token')
|
||||
if self._valid_device_token():
|
||||
return
|
||||
@@ -248,16 +274,30 @@ class CBCWatchBaseIE(InfoExtractor):
|
||||
def _valid_device_token(self):
|
||||
return self._device_id and self._device_token
|
||||
|
||||
def _cache_device_key(self):
|
||||
email, _ = self._get_login_info()
|
||||
return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
|
||||
|
||||
def _register_device(self):
|
||||
self._device_id = self._device_token = None
|
||||
result = self._download_xml(
|
||||
self._API_BASE_URL + 'device/register',
|
||||
None, 'Acquiring device token',
|
||||
data=b'<device><type>web</type></device>')
|
||||
self._device_id = xpath_text(result, 'deviceId', fatal=True)
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
email, password = self._get_login_info()
|
||||
if email and password:
|
||||
signature = self._signature(email, password)
|
||||
data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
|
||||
escape(signature), escape(self._device_id)).encode()
|
||||
url = self._API_BASE_URL + 'device/login'
|
||||
result = self._download_xml(
|
||||
url, None, data=data,
|
||||
headers={'content-type': 'application/xml'})
|
||||
self._device_token = xpath_text(result, 'token', fatal=True)
|
||||
else:
|
||||
self._device_token = xpath_text(result, 'deviceToken', fatal=True)
|
||||
self._downloader.cache.store(
|
||||
'cbcwatch', 'device', {
|
||||
'cbcwatch', self._cache_device_key(), {
|
||||
'id': self._device_id,
|
||||
'token': self._device_token,
|
||||
})
|
||||
|
@@ -5,10 +5,16 @@ import codecs
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
@@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
|
||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
webpage, 'rating', fatal=False, group='rating_value')
|
||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||
group='rating_value')
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
@@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
|
||||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
# Source: https://www.cda.pl/js/player.js?t=1606154898
|
||||
def decrypt_file(a):
|
||||
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
|
||||
a = a.replace(p, '')
|
||||
a = compat_urllib_parse_unquote(a)
|
||||
b = []
|
||||
for c in a:
|
||||
f = compat_ord(c)
|
||||
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
|
||||
a = ''.join(b)
|
||||
a = a.replace('.cda.mp4', '')
|
||||
for p in ('.2cda.pl', '.3cda.pl'):
|
||||
a = a.replace(p, '.cda.pl')
|
||||
if '/upstream' in a:
|
||||
a = a.replace('/upstream', '.mp4/upstream')
|
||||
return 'https://' + a
|
||||
return 'https://' + a + '.mp4'
|
||||
|
||||
def extract_format(page, version):
|
||||
json_str = self._html_search_regex(
|
||||
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
|
||||
@@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
f = {
|
||||
'url': video['file'],
|
||||
}
|
||||
@@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return info_dict
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(info_dict, info)
|
||||
|
@@ -1,20 +1,24 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
|
||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/|
|
||||
embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=
|
||||
(?:watch\.)?%s/|
|
||||
%s
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
'''
|
||||
(?P<id>%s)
|
||||
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||
'info_dict': {
|
||||
@@ -41,23 +45,28 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
|
||||
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
|
||||
base_url = 'https://%s/%s/' % (domain, video_id)
|
||||
if '.' in video_id:
|
||||
video_id = self._parse_json(base64.urlsafe_b64decode(
|
||||
video_id.split('.')[1]), video_id)['sub']
|
||||
manifest_base_url = base_url + 'manifest/video.'
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
|
||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False)
|
||||
manifest_base_url + 'm3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
@@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
'info_dict': {
|
||||
@@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||
'video id')
|
||||
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
video_id = self._download_json(
|
||||
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||
'query': '''{
|
||||
page(path: "%s") {
|
||||
vcpsId
|
||||
}
|
||||
}''' % path,
|
||||
})['data']['page']['vcpsId']
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
|
@@ -10,12 +10,13 @@ import os
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import ssl
|
||||
import sys
|
||||
import time
|
||||
import math
|
||||
|
||||
from ..compat import (
|
||||
compat_cookiejar,
|
||||
compat_cookiejar_Cookie,
|
||||
compat_cookies,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
@@ -67,6 +68,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
sanitize_filename,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -623,9 +625,12 @@ class InfoExtractor(object):
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
if data is not None or headers:
|
||||
url_or_request = sanitized_Request(url_or_request, data, headers)
|
||||
exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
|
||||
if hasattr(ssl, 'CertificateError'):
|
||||
exceptions.append(ssl.CertificateError)
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
except tuple(exceptions) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
@@ -1182,16 +1187,33 @@ class InfoExtractor(object):
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
json_ld_list = list(re.finditer(JSON_LD_RE, html))
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||
# At the same time `default` may be passed that assumes `fatal=False`
|
||||
# for _search_regex. Let's simulate the same behavior here as well.
|
||||
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||
json_ld = []
|
||||
for mobj in json_ld_list:
|
||||
json_ld_item = self._parse_json(
|
||||
mobj.group('json_ld'), video_id, fatal=fatal)
|
||||
if not json_ld_item:
|
||||
continue
|
||||
if isinstance(json_ld_item, dict):
|
||||
json_ld.append(json_ld_item)
|
||||
elif isinstance(json_ld_item, (list, tuple)):
|
||||
json_ld.extend(json_ld_item)
|
||||
if json_ld:
|
||||
json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||
if json_ld:
|
||||
return json_ld
|
||||
if default is not NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract JSON-LD')
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
|
||||
return {}
|
||||
|
||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
if isinstance(json_ld, compat_str):
|
||||
@@ -1227,7 +1249,10 @@ class InfoExtractor(object):
|
||||
interaction_type = is_e.get('interactionType')
|
||||
if not isinstance(interaction_type, compat_str):
|
||||
continue
|
||||
interaction_count = int_or_none(is_e.get('userInteractionCount'))
|
||||
# For interaction count some sites provide string instead of
|
||||
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||
# so extracting count with more relaxed str_to_int
|
||||
interaction_count = str_to_int(is_e.get('userInteractionCount'))
|
||||
if interaction_count is None:
|
||||
continue
|
||||
count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
|
||||
@@ -1247,6 +1272,7 @@ class InfoExtractor(object):
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'uploader': str_or_none(e.get('author')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
@@ -1256,10 +1282,10 @@ class InfoExtractor(object):
|
||||
extract_interaction_statistic(e)
|
||||
|
||||
for e in json_ld:
|
||||
if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
|
||||
if '@context' in e:
|
||||
item_type = e.get('@type')
|
||||
if expected_type is not None and expected_type != item_type:
|
||||
return info
|
||||
continue
|
||||
if item_type in ('TVEpisode', 'Episode'):
|
||||
episode_name = unescapeHTML(e.get('name'))
|
||||
info.update({
|
||||
@@ -1293,11 +1319,17 @@ class InfoExtractor(object):
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
extract_video_object(e)
|
||||
continue
|
||||
if expected_type is None:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
video = e.get('video')
|
||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||
extract_video_object(video)
|
||||
break
|
||||
if expected_type is None:
|
||||
continue
|
||||
else:
|
||||
break
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@staticmethod
|
||||
@@ -1424,9 +1456,10 @@ class InfoExtractor(object):
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError:
|
||||
except ExtractorError as e:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
'%s: %s URL is invalid, skipping: %s'
|
||||
% (video_id, item, error_to_compat_str(e.cause)))
|
||||
return False
|
||||
|
||||
def http_scheme(self):
|
||||
@@ -1631,7 +1664,7 @@ class InfoExtractor(object):
|
||||
# just the media without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# master playlist tags MUST NOT appear in a media playlist and vice versa.
|
||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
@@ -2340,6 +2373,8 @@ class InfoExtractor(object):
|
||||
if res is False:
|
||||
return []
|
||||
ism_doc, urlh = res
|
||||
if ism_doc is None:
|
||||
return []
|
||||
|
||||
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
|
||||
|
||||
@@ -2561,6 +2596,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||
formats = []
|
||||
|
||||
hdcore_sign = 'hdcore=3.7.0'
|
||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||
hds_host = hosts.get('hds')
|
||||
@@ -2573,6 +2609,7 @@ class InfoExtractor(object):
|
||||
for entry in f4m_formats:
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.extend(f4m_formats)
|
||||
|
||||
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||
hls_host = hosts.get('hls')
|
||||
if hls_host:
|
||||
@@ -2580,6 +2617,31 @@ class InfoExtractor(object):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
http_host = hosts.get('http')
|
||||
if http_host and 'hdnea=' not in manifest_url:
|
||||
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+'
|
||||
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||
qualities_length = len(qualities)
|
||||
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1):
|
||||
i = 0
|
||||
http_formats = []
|
||||
for f in formats:
|
||||
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
|
||||
for protocol in ('http', 'https'):
|
||||
http_f = f.copy()
|
||||
del http_f['manifest_url']
|
||||
http_url = re.sub(
|
||||
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url'])
|
||||
http_f.update({
|
||||
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||
'url': http_url,
|
||||
'protocol': protocol,
|
||||
})
|
||||
http_formats.append(http_f)
|
||||
i += 1
|
||||
formats.extend(http_formats)
|
||||
|
||||
return formats
|
||||
|
||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||
@@ -2818,7 +2880,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
|
||||
path='/', secure=False, discard=False, rest={}, **kwargs):
|
||||
cookie = compat_cookiejar.Cookie(
|
||||
cookie = compat_cookiejar_Cookie(
|
||||
0, name, value, port, port is not None, domain, True,
|
||||
domain.startswith('.'), path, True, secure, expire_time,
|
||||
discard, None, None, rest)
|
||||
|
@@ -16,6 +16,8 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
@@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
|
||||
'uploader': 'gq',
|
||||
'upload_date': '20170321',
|
||||
'timestamp': 1490126427,
|
||||
'description': 'How much grimmer would things be if these people were competent?',
|
||||
},
|
||||
}, {
|
||||
# JS embed
|
||||
@@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
|
||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||
'uploader': 'arstechnica',
|
||||
'upload_date': '20150916',
|
||||
'timestamp': 1442434955,
|
||||
'timestamp': 1442434920,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||
@@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for t, caption in video_info.get('captions', {}).items():
|
||||
caption_url = caption.get('src')
|
||||
if not (t in ('vtt', 'srt', 'tml') and caption_url):
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
|
||||
'season': video_info.get('season_title'),
|
||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||
'categories': video_info.get('categories'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
|
||||
if url_type == 'series':
|
||||
return self._extract_series(url, webpage)
|
||||
else:
|
||||
params = self._extract_video_params(webpage, display_id)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
video = try_get(self._parse_json(self._search_regex(
|
||||
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', '{}'), display_id),
|
||||
lambda x: x['transformed']['video'])
|
||||
if video:
|
||||
params = {'videoId': video['id']}
|
||||
info = {'description': strip_or_none(video.get('description'))}
|
||||
else:
|
||||
params = self._extract_video_params(webpage, display_id)
|
||||
info = self._search_json_ld(
|
||||
webpage, display_id, fatal=False)
|
||||
info.update(self._extract_video(params))
|
||||
return info
|
||||
|
@@ -13,6 +13,7 @@ from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_Element,
|
||||
compat_etree_fromstring,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urllib_request,
|
||||
compat_urlparse,
|
||||
@@ -25,9 +26,9 @@ from ..utils import (
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
merge_dicts,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -136,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
# rtmp
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
|
||||
'info_dict': {
|
||||
@@ -157,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
'info_dict': {
|
||||
'id': '702409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
|
||||
'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'TV TOKYO',
|
||||
'upload_date': '20160508',
|
||||
'uploader': 'Re:Zero Partners',
|
||||
'timestamp': 1462098900,
|
||||
'upload_date': '20160501',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -172,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
|
||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
'upload_date': '20170118',
|
||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||
'timestamp': 1484130900,
|
||||
'upload_date': '20170111',
|
||||
'series': compat_str,
|
||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||
@@ -200,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'upload_date': '20091021',
|
||||
'timestamp': 1255512600,
|
||||
'upload_date': '20091014',
|
||||
},
|
||||
'params': {
|
||||
# Just test metadata extraction
|
||||
@@ -224,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
# just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video gone',
|
||||
}, {
|
||||
# A video with a vastly different season name compared to the series name
|
||||
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
|
||||
'info_dict': {
|
||||
'id': '590532',
|
||||
'ext': 'mp4',
|
||||
'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
|
||||
'description': 'Mahiro and Nyaruko talk about official certification.',
|
||||
'title': compat_str,
|
||||
'description': compat_str,
|
||||
'uploader': 'TV TOKYO',
|
||||
'timestamp': 1330956000,
|
||||
'upload_date': '20120305',
|
||||
'series': 'Nyarko-san: Another Crawling Chaos',
|
||||
'season': 'Haiyoru! Nyaruani (ONA)',
|
||||
@@ -442,23 +449,21 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
webpage, 'language', default=None, group='lang')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
(r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
|
||||
webpage, 'video_title', default=None)
|
||||
if not video_title:
|
||||
video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
[r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
|
||||
webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
|
||||
if video_upload_date:
|
||||
video_upload_date = unified_strdate(video_upload_date)
|
||||
video_uploader = self._html_search_regex(
|
||||
# try looking for both an uploader that's a link and one that's not
|
||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||
webpage, 'video_uploader', fatal=False)
|
||||
webpage, 'video_uploader', default=False)
|
||||
|
||||
formats = []
|
||||
for stream in media.get('streams', []):
|
||||
@@ -611,14 +616,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
webpage, 'season number', default=None))
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
@@ -626,7 +632,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
}, info)
|
||||
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
|
@@ -32,7 +32,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _get_cookie_value(cookies, name):
|
||||
cookie = cookies.get('name')
|
||||
cookie = cookies.get(name)
|
||||
if cookie:
|
||||
return cookie.value
|
||||
|
||||
|
@@ -16,10 +16,11 @@ class DctpTvIE(InfoExtractor):
|
||||
_TESTS = [{
|
||||
# 4x3
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'md5': '3ffbd1556c3fe210724d7088fad723e3',
|
||||
'info_dict': {
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
|
||||
'ext': 'flv',
|
||||
'ext': 'm4v',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
@@ -27,10 +28,6 @@ class DctpTvIE(InfoExtractor):
|
||||
'timestamp': 1302172322,
|
||||
'upload_date': '20110407',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# 16x9
|
||||
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||
@@ -59,33 +56,26 @@ class DctpTvIE(InfoExtractor):
|
||||
|
||||
uuid = media['uuid']
|
||||
title = media['title']
|
||||
ratio = '16x9' if media.get('is_wide') else '4x3'
|
||||
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
||||
is_wide = media.get('is_wide')
|
||||
formats = []
|
||||
|
||||
servers = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
||||
note='Downloading server list JSON', fatal=False)
|
||||
def add_formats(suffix):
|
||||
templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
|
||||
formats.extend([{
|
||||
'format_id': 'hls-' + suffix,
|
||||
'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
|
||||
'protocol': 'm3u8_native',
|
||||
}, {
|
||||
'format_id': 's3-' + suffix,
|
||||
'url': templ % 'completed-media.s3.amazonaws.com',
|
||||
}, {
|
||||
'format_id': 'http-' + suffix,
|
||||
'url': templ % 'cdn-media.dctp.tv',
|
||||
}])
|
||||
|
||||
if servers:
|
||||
endpoint = next(
|
||||
server['endpoint']
|
||||
for server in servers
|
||||
if url_or_none(server.get('endpoint'))
|
||||
and 'cloudfront' in server['endpoint'])
|
||||
else:
|
||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||
|
||||
app = self._search_regex(
|
||||
r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
|
||||
|
||||
formats = [{
|
||||
'url': endpoint,
|
||||
'app': app,
|
||||
'play_path': play_path,
|
||||
'page_url': url,
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
||||
'ext': 'flv',
|
||||
}]
|
||||
add_formats('0500_' + ('16x9' if is_wide else '4x3'))
|
||||
if is_wide:
|
||||
add_formats('720p')
|
||||
|
||||
thumbnails = []
|
||||
images = media.get('images')
|
||||
|
@@ -13,8 +13,8 @@ from ..compat import compat_HTTPError
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:(?:www|go)\.)?discovery|
|
||||
(?:www\.)?
|
||||
go\.discovery|
|
||||
www\.
|
||||
(?:
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
@@ -22,8 +22,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocity
|
||||
tlc
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
@@ -83,7 +82,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
'authRel': 'authorization',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
'redirectUri': 'https://www.discovery.com/',
|
||||
})['access_token']
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
|
@@ -7,7 +7,7 @@ from .dplay import DPlayIE
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
@@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
||||
}, {
|
||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
@@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
@@ -60,7 +60,7 @@ class EuropaIE(InfoExtractor):
|
||||
|
||||
title = get_item('title', preferred_langs) or video_id
|
||||
description = get_item('description', preferred_langs)
|
||||
thumbnmail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||
thumbnail = xpath_text(playlist, './info/thumburl', 'thumbnail')
|
||||
upload_date = unified_strdate(xpath_text(playlist, './info/date', 'upload date'))
|
||||
duration = parse_duration(xpath_text(playlist, './info/duration', 'duration'))
|
||||
view_count = int_or_none(xpath_text(playlist, './info/views', 'views'))
|
||||
@@ -85,7 +85,7 @@ class EuropaIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnmail,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
|
@@ -15,7 +15,7 @@ from ..utils import (
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?expressen\.se/
|
||||
(?:www\.)?(?:expressen|di)\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
@@ -42,13 +42,16 @@ class ExpressenIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -36,6 +36,7 @@ from .afreecatv import AfreecaTVIE
|
||||
from .airmozilla import AirMozillaIE
|
||||
from .aljazeera import AlJazeeraIE
|
||||
from .alphaporno import AlphaPornoIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .americastestkitchen import AmericasTestKitchenIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
@@ -58,7 +59,7 @@ from .ard import (
|
||||
ARDMediathekIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVPlus7IE,
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
@@ -105,6 +106,7 @@ from .bilibili import (
|
||||
BiliBiliBangumiIE,
|
||||
BilibiliAudioIE,
|
||||
BilibiliAudioAlbumIE,
|
||||
BiliBiliPlayerIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
@@ -120,6 +122,7 @@ from .blinkx import BlinkxIE
|
||||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .bpb import BpbIE
|
||||
from .br import (
|
||||
BRIE,
|
||||
@@ -497,7 +500,6 @@ from .jeuxvideo import JeuxVideoIE
|
||||
from .jove import JoveIE
|
||||
from .joj import JojIE
|
||||
from .jwplatform import JWPlatformIE
|
||||
from .jpopsukitv import JpopsukiIE
|
||||
from .kakao import KakaoIE
|
||||
from .kaltura import KalturaIE
|
||||
from .kanalplay import KanalPlayIE
|
||||
@@ -529,6 +531,7 @@ from .laola1tv import (
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lbry import LBRYIE
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
LcpPlayIE,
|
||||
@@ -603,6 +606,7 @@ from .markiza import (
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
from .mediaset import MediasetIE
|
||||
from .mediasite import (
|
||||
MediasiteIE,
|
||||
@@ -636,7 +640,10 @@ from .mixcloud import (
|
||||
from .mlb import MLBIE
|
||||
from .mnet import MnetIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mofosex import (
|
||||
MofosexIE,
|
||||
MofosexEmbedIE,
|
||||
)
|
||||
from .mojvideo import MojvideoIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
@@ -777,6 +784,7 @@ from .ntvru import NTVRuIE
|
||||
from .nytimes import (
|
||||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
NYTimesCookingIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzz import NZZIE
|
||||
@@ -801,6 +809,16 @@ from .orf import (
|
||||
ORFFM4IE,
|
||||
ORFFM4StoryIE,
|
||||
ORFOE1IE,
|
||||
ORFOE3IE,
|
||||
ORFNOEIE,
|
||||
ORFWIEIE,
|
||||
ORFBGLIE,
|
||||
ORFOOEIE,
|
||||
ORFSTMIE,
|
||||
ORFKTNIE,
|
||||
ORFSBGIE,
|
||||
ORFTIRIE,
|
||||
ORFVBGIE,
|
||||
ORFIPTVIE,
|
||||
)
|
||||
from .outsidetv import OutsideTVIE
|
||||
@@ -808,7 +826,6 @@ from .packtpub import (
|
||||
PacktPubIE,
|
||||
PacktPubCourseIE,
|
||||
)
|
||||
from .pandatv import PandaTVIE
|
||||
from .pandoratv import PandoraTVIE
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
@@ -830,6 +847,10 @@ from .picarto import (
|
||||
)
|
||||
from .piksel import PikselIE
|
||||
from .pinkbike import PinkbikeIE
|
||||
from .pinterest import (
|
||||
PinterestIE,
|
||||
PinterestCollectionIE,
|
||||
)
|
||||
from .pladform import PladformIE
|
||||
from .platzi import (
|
||||
PlatziIE,
|
||||
@@ -851,6 +872,7 @@ from .polskieradio import (
|
||||
PolskieRadioIE,
|
||||
PolskieRadioCategoryIE,
|
||||
)
|
||||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
@@ -905,7 +927,9 @@ from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
RedBullEmbedIE,
|
||||
RedBullTVRrnContentIE,
|
||||
RedBullIE,
|
||||
)
|
||||
from .reddit import (
|
||||
RedditIE,
|
||||
@@ -942,6 +966,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rumble import RumbleEmbedIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
RutubeChannelIE,
|
||||
@@ -992,6 +1017,16 @@ from .shared import (
|
||||
from .showroomlive import ShowRoomLiveIE
|
||||
from .sina import SinaIE
|
||||
from .sixplay import SixPlayIE
|
||||
from .skyit import (
|
||||
SkyItPlayerIE,
|
||||
SkyItVideoIE,
|
||||
SkyItVideoLiveIE,
|
||||
SkyItIE,
|
||||
SkyItAcademyIE,
|
||||
SkyItArteIE,
|
||||
CieloTVItIE,
|
||||
TV8ItIE,
|
||||
)
|
||||
from .skylinewebcams import SkylineWebcamsIE
|
||||
from .skynewsarabia import (
|
||||
SkyNewsArabiaIE,
|
||||
@@ -1038,8 +1073,7 @@ from .spankbang import (
|
||||
SpankBangPlaylistIE,
|
||||
)
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spiegel import SpiegelIE
|
||||
from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
@@ -1216,14 +1250,11 @@ from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import TwitCastingIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
TwitchVodIE,
|
||||
TwitchProfileIE,
|
||||
TwitchAllVideosIE,
|
||||
TwitchUploadsIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchHighlightsIE,
|
||||
TwitchCollectionIE,
|
||||
TwitchVideosIE,
|
||||
TwitchVideosClipsIE,
|
||||
TwitchVideosCollectionsIE,
|
||||
TwitchStreamIE,
|
||||
TwitchClipsIE,
|
||||
)
|
||||
@@ -1344,8 +1375,8 @@ from .vk import (
|
||||
)
|
||||
from .vlive import (
|
||||
VLiveIE,
|
||||
VLivePostIE,
|
||||
VLiveChannelIE,
|
||||
VLivePlaylistIE
|
||||
)
|
||||
from .vodlocker import VodlockerIE
|
||||
from .vodpl import VODPlIE
|
||||
@@ -1462,21 +1493,18 @@ from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
YoutubeChannelIE,
|
||||
YoutubeFavouritesIE,
|
||||
YoutubeHistoryIE,
|
||||
YoutubeLiveIE,
|
||||
YoutubeTabIE,
|
||||
YoutubePlaylistIE,
|
||||
YoutubePlaylistsIE,
|
||||
YoutubeRecommendedIE,
|
||||
YoutubeSearchDateIE,
|
||||
YoutubeSearchIE,
|
||||
YoutubeSearchURLIE,
|
||||
YoutubeShowIE,
|
||||
#YoutubeSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeUserIE,
|
||||
YoutubeYtUserIE,
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
|
@@ -466,15 +466,18 @@ class FacebookIE(InfoExtractor):
|
||||
return info_dict
|
||||
|
||||
if '/posts/' in url:
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||
for vid in self._parse_json(
|
||||
self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
|
||||
webpage, 'video ids', group='ids'),
|
||||
video_id)]
|
||||
video_id_json = self._search_regex(
|
||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
|
||||
default='')
|
||||
if video_id_json:
|
||||
entries = [
|
||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
||||
for vid in self._parse_json(video_id_json, video_id)]
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
# Single Video?
|
||||
video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
|
||||
return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
||||
else:
|
||||
_, info_dict = self._extract_from_url(
|
||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
||||
|
@@ -31,7 +31,13 @@ class FranceCultureIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
|
||||
r'''(?sx)
|
||||
(?:
|
||||
</h1>|
|
||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||
).*?
|
||||
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
||||
''',
|
||||
webpage, 'video data'))
|
||||
|
||||
video_url = video_data['data-asset-source']
|
||||
|
@@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
|
||||
'ext': 'mp3',
|
||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'upload_date': '20160907',
|
||||
},
|
||||
}
|
||||
@@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||
|
||||
upload_date_str = self._search_regex(
|
||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||
@@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
|
@@ -17,6 +17,7 @@ from ..utils import (
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
@@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
is_live = None
|
||||
|
||||
formats = []
|
||||
for video in info['videos']:
|
||||
if video['statut'] != 'ONLINE':
|
||||
videos = []
|
||||
|
||||
for video in (info.get('videos') or []):
|
||||
if video.get('statut') != 'ONLINE':
|
||||
continue
|
||||
video_url = video['url']
|
||||
if not video.get('url'):
|
||||
continue
|
||||
videos.append(video)
|
||||
|
||||
if not videos:
|
||||
for device_type in ['desktop', 'mobile']:
|
||||
fallback_info = self._download_json(
|
||||
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
||||
'device_type': device_type,
|
||||
'browser': 'chrome',
|
||||
}, fatal=False)
|
||||
|
||||
if fallback_info and fallback_info.get('video'):
|
||||
videos.append(fallback_info['video'])
|
||||
|
||||
formats = []
|
||||
for video in videos:
|
||||
video_url = video.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
if is_live is None:
|
||||
is_live = (try_get(
|
||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
||||
bool) is True) or '/live.francetv.fr/' in video_url
|
||||
format_id = video['format']
|
||||
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
|
||||
or video.get('is_live') is True
|
||||
or '/live.francetv.fr/' in video_url)
|
||||
format_id = video.get('format')
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
if georestricted:
|
||||
@@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
|
||||
sign(video_url, format_id), video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||
fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info['titre']
|
||||
@@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'description': clean_html(info.get('synopsis')),
|
||||
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
@@ -60,6 +60,9 @@ from .tnaflix import TNAFlixNetworkEmbedIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .redtube import RedTubeIE
|
||||
from .tube8 import Tube8IE
|
||||
from .mofosex import MofosexEmbedIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .youporn import YouPornIE
|
||||
from .vimeo import VimeoIE
|
||||
from .dailymotion import DailymotionIE
|
||||
from .dailymail import DailyMailIE
|
||||
@@ -88,6 +91,7 @@ from .piksel import PikselIE
|
||||
from .videa import VideaIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .ustream import UstreamIE
|
||||
from .arte import ArteTVEmbedIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
@@ -838,7 +842,7 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# MTVSercices embed
|
||||
# MTVServices embed
|
||||
{
|
||||
'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
|
||||
'md5': 'ca1aef97695ef2c1d6973256a57e5252',
|
||||
@@ -1705,6 +1709,15 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
},
|
||||
{
|
||||
# multiple kaltura embeds, nsfw
|
||||
'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
|
||||
'info_dict': {
|
||||
'id': 'kamila-avec-video-jaime-sadomie',
|
||||
'title': "Kamila avec vídeo “J'aime sadomie”",
|
||||
},
|
||||
'playlist_count': 8,
|
||||
},
|
||||
{
|
||||
# Non-standard Vimeo embed
|
||||
'url': 'https://openclassrooms.com/courses/understanding-the-web',
|
||||
@@ -2098,6 +2111,9 @@ class GenericIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||
'upload_date': '20170909',
|
||||
'timestamp': 1504915200,
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
@@ -2284,7 +2300,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
if head_response is not False:
|
||||
# Check for redirect
|
||||
new_url = compat_str(head_response.geturl())
|
||||
new_url = head_response.geturl()
|
||||
if url != new_url:
|
||||
self.report_following_redirect(new_url)
|
||||
if force_videoid:
|
||||
@@ -2384,12 +2400,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=compat_str(full_response.geturl())),
|
||||
xspf_base_url=full_response.geturl()),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'] = self._parse_mpd_formats(
|
||||
doc,
|
||||
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._sort_formats(info_dict['formats'])
|
||||
return info_dict
|
||||
@@ -2533,15 +2549,21 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
|
||||
|
||||
# Look for Teachable embeds, must be before Wistia
|
||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||
if teachable_url:
|
||||
return self.url_result(teachable_url)
|
||||
|
||||
# Look for embedded Wistia player
|
||||
wistia_url = WistiaIE._extract_url(webpage)
|
||||
if wistia_url:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._proto_relative_url(wistia_url),
|
||||
'ie_key': WistiaIE.ie_key(),
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
wistia_urls = WistiaIE._extract_urls(webpage)
|
||||
if wistia_urls:
|
||||
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
|
||||
for entry in playlist['entries']:
|
||||
entry.update({
|
||||
'_type': 'url_transparent',
|
||||
'uploader': video_uploader,
|
||||
})
|
||||
return playlist
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
@@ -2706,6 +2728,21 @@ class GenericIE(InfoExtractor):
|
||||
if tube8_urls:
|
||||
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
|
||||
|
||||
# Look for embedded Mofosex player
|
||||
mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
|
||||
if mofosex_urls:
|
||||
return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
|
||||
|
||||
# Look for embedded Spankwire player
|
||||
spankwire_urls = SpankwireIE._extract_urls(webpage)
|
||||
if spankwire_urls:
|
||||
return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
|
||||
|
||||
# Look for embedded YouPorn player
|
||||
youporn_urls = YouPornIE._extract_urls(webpage)
|
||||
if youporn_urls:
|
||||
return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
|
||||
|
||||
# Look for embedded Tvigle player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
|
||||
@@ -2724,11 +2761,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||
|
||||
# Look for embedded arte.tv player
|
||||
mobj = re.search(
|
||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
||||
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
|
||||
if arte_urls:
|
||||
return self.playlist_from_matches(arte_urls, video_id, video_title)
|
||||
|
||||
# Look for embedded francetv player
|
||||
mobj = re.search(
|
||||
@@ -2817,9 +2852,12 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
|
||||
kaltura_urls = KalturaIE._extract_urls(webpage)
|
||||
if kaltura_urls:
|
||||
return self.playlist_from_matches(
|
||||
kaltura_urls, video_id, video_title,
|
||||
getter=lambda x: smuggle_url(x, {'source_url': url}),
|
||||
ie=KalturaIE.ie_key())
|
||||
|
||||
# Look for EaglePlatform embeds
|
||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||
@@ -2960,7 +2998,7 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for VODPlatform embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
@@ -3137,10 +3175,6 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
|
||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||
if teachable_url:
|
||||
return self.url_result(teachable_url)
|
||||
|
||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||
if indavideo_urls:
|
||||
return self.playlist_from_matches(
|
||||
|
@@ -13,10 +13,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class GiantBombIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
|
||||
'md5': 'c8ea694254a59246a42831155dec57ac',
|
||||
'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
|
||||
'info_dict': {
|
||||
'id': '2300-9782',
|
||||
'display_id': 'quick-look-destiny-the-dark-below',
|
||||
@@ -26,7 +26,10 @@ class GiantBombIE(InfoExtractor):
|
||||
'duration': 2399,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@@ -38,21 +40,10 @@ class GoogleDriveIE(InfoExtractor):
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||
'info_dict': {
|
||||
'id': '0B-vUyvmDLdWDcEt4WjBqcmI2XzQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Annabelle Creation (2017)- Z.V1 [TH].MP4',
|
||||
}
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video id is longer than 28 characters
|
||||
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
|
||||
'info_dict': {
|
||||
'id': '1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andreea Banica feat Smiley - Hooky Song (Official Video).mp4',
|
||||
'duration': 189,
|
||||
},
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.google.com/open?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
@@ -171,23 +162,21 @@ class GoogleDriveIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'http://docs.google.com/file/d/%s' % video_id, video_id)
|
||||
video_info = compat_parse_qs(self._download_webpage(
|
||||
'https://drive.google.com/get_video_info',
|
||||
video_id, query={'docid': video_id}))
|
||||
|
||||
title = self._search_regex(
|
||||
r'"title"\s*,\s*"([^"]+)', webpage, 'title',
|
||||
default=None) or self._og_search_title(webpage)
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds',
|
||||
default=None))
|
||||
def get_value(key):
|
||||
return try_get(video_info, lambda x: x[key][0])
|
||||
|
||||
reason = get_value('reason')
|
||||
title = get_value('title')
|
||||
if not title and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
formats = []
|
||||
fmt_stream_map = self._search_regex(
|
||||
r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt stream map', default='').split(',')
|
||||
fmt_list = self._search_regex(
|
||||
r'"fmt_list"\s*,\s*"([^"]+)', webpage,
|
||||
'fmt_list', default='').split(',')
|
||||
fmt_stream_map = (get_value('fmt_stream_map') or '').split(',')
|
||||
fmt_list = (get_value('fmt_list') or '').split(',')
|
||||
if fmt_stream_map and fmt_list:
|
||||
resolutions = {}
|
||||
for fmt in fmt_list:
|
||||
@@ -220,19 +209,27 @@ class GoogleDriveIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
})
|
||||
urlh = self._request_webpage(
|
||||
source_url, video_id, note='Requesting source file',
|
||||
errnote='Unable to request source file', fatal=False)
|
||||
|
||||
def request_source_file(source_url, kind):
|
||||
return self._request_webpage(
|
||||
source_url, video_id, note='Requesting %s file' % kind,
|
||||
errnote='Unable to request %s file' % kind, fatal=False)
|
||||
urlh = request_source_file(source_url, 'source')
|
||||
if urlh:
|
||||
def add_source_format(src_url):
|
||||
def add_source_format(urlh):
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
# Use redirect URLs as download URLs in order to calculate
|
||||
# correct cookies in _calc_cookies.
|
||||
# Using original URLs may result in redirect loop due to
|
||||
# google.com's cookies mistakenly used for googleusercontent.com
|
||||
# redirect URLs (see #23919).
|
||||
'url': urlh.geturl(),
|
||||
'ext': determine_ext(title, 'mp4').lower(),
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
})
|
||||
if urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(source_url)
|
||||
add_source_format(urlh)
|
||||
else:
|
||||
confirmation_webpage = self._webpage_read_content(
|
||||
urlh, url, video_id, note='Downloading confirmation page',
|
||||
@@ -242,23 +239,21 @@ class GoogleDriveIE(InfoExtractor):
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', fatal=False)
|
||||
if confirm:
|
||||
add_source_format(update_url_query(source_url, {
|
||||
confirmed_source_url = update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
}))
|
||||
})
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
|
||||
if not formats:
|
||||
reason = self._search_regex(
|
||||
r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
|
||||
if reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
if not formats and reason:
|
||||
raise ExtractorError(reason, expected=True)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
hl = self._search_regex(
|
||||
r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None)
|
||||
hl = get_value('hl')
|
||||
subtitles_id = None
|
||||
ttsurl = self._search_regex(
|
||||
r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None)
|
||||
ttsurl = get_value('ttsurl')
|
||||
if ttsurl:
|
||||
# the video Id for subtitles will be the last value in the ttsurl
|
||||
# query string
|
||||
@@ -268,8 +263,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'duration': duration,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=' + video_id,
|
||||
'duration': int_or_none(get_value('length_seconds')),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(video_id, subtitles_id, hl),
|
||||
'automatic_captions': self.extract_automatic_captions(
|
||||
|
@@ -1,12 +1,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
remove_end,
|
||||
determine_ext,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -14,15 +13,21 @@ class HellPornoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
|
||||
'md5': '1fee339c610d2049699ef2aa699439f1',
|
||||
'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
|
||||
'info_dict': {
|
||||
'id': '149116',
|
||||
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dixie is posing with naked ass very erotic',
|
||||
'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
|
||||
'categories': list,
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'duration': 240,
|
||||
'timestamp': 1398762720,
|
||||
'upload_date': '20140429',
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://hellporno.net/v/186271/',
|
||||
'only_matching': True,
|
||||
@@ -36,40 +41,36 @@ class HellPornoIE(InfoExtractor):
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
||||
|
||||
flashvars = self._parse_json(self._search_regex(
|
||||
r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
|
||||
display_id, transform_source=js_to_json)
|
||||
info = self._parse_html5_media_entries(url, webpage, display_id)[0]
|
||||
self._sort_formats(info['formats'])
|
||||
|
||||
video_id = flashvars.get('video_id')
|
||||
thumbnail = flashvars.get('preview_url')
|
||||
ext = determine_ext(flashvars.get('postfix'), 'mp4')
|
||||
video_id = self._search_regex(
|
||||
(r'chs_object\s*=\s*["\'](\d+)',
|
||||
r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
|
||||
default=display_id)
|
||||
description = self._search_regex(
|
||||
r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
|
||||
'description', fatal=False)
|
||||
categories = [
|
||||
c.strip()
|
||||
for c in self._html_search_meta(
|
||||
'keywords', webpage, 'categories', default='').split(',')
|
||||
if c.strip()]
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, fatal=False))
|
||||
timestamp = unified_timestamp(self._og_search_property(
|
||||
'video:release_date', webpage, fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
|
||||
|
||||
formats = []
|
||||
for video_url_key in ['video_url', 'video_alt_url']:
|
||||
video_url = flashvars.get(video_url_key)
|
||||
if not video_url:
|
||||
continue
|
||||
video_text = flashvars.get('%s_text' % video_url_key)
|
||||
fmt = {
|
||||
'url': video_url,
|
||||
'ext': ext,
|
||||
'format_id': video_text,
|
||||
}
|
||||
m = re.search(r'^(?P<height>\d+)[pP]', video_text)
|
||||
if m:
|
||||
fmt['height'] = int(m.group('height'))
|
||||
formats.append(fmt)
|
||||
self._sort_formats(formats)
|
||||
|
||||
categories = self._html_search_meta(
|
||||
'keywords', webpage, 'categories', default='').split(',')
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'categories': categories,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'age_limit': 18,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -1,5 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@@ -8,6 +10,7 @@ from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
qualities,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@@ -15,15 +18,16 @@ from ..utils import (
|
||||
class ImdbIE(InfoExtractor):
|
||||
IE_NAME = 'imdb'
|
||||
IE_DESC = 'Internet Movie Database trailers'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||
'info_dict': {
|
||||
'id': '2524815897',
|
||||
'ext': 'mp4',
|
||||
'title': 'No. 2 from Ice Age: Continental Drift (2012)',
|
||||
'title': 'No. 2',
|
||||
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
|
||||
'duration': 152,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||
@@ -47,21 +51,23 @@ class ImdbIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
|
||||
video_metadata = self._parse_json(self._search_regex(
|
||||
r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
|
||||
'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
|
||||
query={
|
||||
'key': base64.b64encode(json.dumps({
|
||||
'type': 'VIDEO_PLAYER',
|
||||
'subType': 'FORCE_LEGACY',
|
||||
'id': 'vi%s' % video_id,
|
||||
}).encode()).decode(),
|
||||
})[0]
|
||||
|
||||
quality = qualities(('SD', '480p', '720p', '1080p'))
|
||||
formats = []
|
||||
for encoding in video_metadata.get('encodings', []):
|
||||
for encoding in data['videoLegacyEncodings']:
|
||||
if not encoding or not isinstance(encoding, dict):
|
||||
continue
|
||||
video_url = url_or_none(encoding.get('videoUrl'))
|
||||
video_url = url_or_none(encoding.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = mimetype2ext(encoding.get(
|
||||
@@ -69,7 +75,7 @@ class ImdbIE(InfoExtractor):
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
preference=1, m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
format_id = encoding.get('definition')
|
||||
formats.append({
|
||||
@@ -80,13 +86,33 @@ class ImdbIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.imdb.com/video/vi' + video_id, video_id)
|
||||
video_metadata = self._parse_json(self._search_regex(
|
||||
r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
|
||||
'video metadata'), video_id)
|
||||
|
||||
video_info = video_metadata.get('VIDEO_INFO')
|
||||
if video_info and isinstance(video_info, dict):
|
||||
info = try_get(
|
||||
video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
|
||||
else:
|
||||
info = {}
|
||||
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title',
|
||||
default=None) or info['videoTitle']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': info.get('videoSubTitle'),
|
||||
'formats': formats,
|
||||
'description': video_metadata.get('description'),
|
||||
'thumbnail': video_metadata.get('slate', {}).get('url'),
|
||||
'duration': parse_duration(video_metadata.get('duration')),
|
||||
'description': info.get('videoDescription'),
|
||||
'thumbnail': url_or_none(try_get(
|
||||
video_metadata, lambda x: x['videoSlate']['source'])),
|
||||
'duration': parse_duration(info.get('videoRuntime')),
|
||||
}
|
||||
|
||||
|
||||
|
@@ -58,7 +58,7 @@ class IndavideoEmbedIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video = self._download_json(
|
||||
'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
title = video['title']
|
||||
|
@@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||
|
||||
def _extract_rtmp_video(self, webpage):
|
||||
# The server URL is hardcoded
|
||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
||||
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||
|
||||
# Extract video URL
|
||||
encoded_id = self._search_regex(
|
||||
@@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
|
||||
return [{
|
||||
'format_id': 'http_video',
|
||||
'url': http_video_url,
|
||||
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||
}]
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
||||
# base URL is found in the Location header in the response returned by
|
||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||
|
||||
# audio file seem to be missing some times even if there is a download link
|
||||
|
@@ -16,12 +16,22 @@ class IPrimaIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://play.iprima.cz/gondici-s-r-o-33',
|
||||
'url': 'https://prima.iprima.cz/particka/92-epizoda',
|
||||
'info_dict': {
|
||||
'id': 'p136534',
|
||||
'id': 'p51388',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gondíci s. r. o. (34)',
|
||||
'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
|
||||
'title': 'Partička (92)',
|
||||
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cnn.iprima.cz/videa/70-epizoda',
|
||||
'info_dict': {
|
||||
'id': 'p681554',
|
||||
'ext': 'mp4',
|
||||
'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # m3u8 download
|
||||
@@ -68,9 +78,16 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'<h1>([^<]+)', webpage, 'title')
|
||||
|
||||
video_id = self._search_regex(
|
||||
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
|
||||
r'data-product="([^"]+)">'),
|
||||
r'data-product="([^"]+)">',
|
||||
r'id=["\']player-(p\d+)"',
|
||||
r'playerId\s*:\s*["\']player-(p\d+)',
|
||||
r'\bvideos\s*=\s*["\'](p\d+)'),
|
||||
webpage, 'real id')
|
||||
|
||||
playerpage = self._download_webpage(
|
||||
@@ -125,8 +142,8 @@ class IPrimaIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
}
|
||||
|
@@ -150,7 +150,7 @@ class IqiyiSDKInterpreter(object):
|
||||
elif function in other_functions:
|
||||
other_functions[function]()
|
||||
else:
|
||||
raise ExtractorError('Unknown funcion %s' % function)
|
||||
raise ExtractorError('Unknown function %s' % function)
|
||||
|
||||
return sdk.target
|
||||
|
||||
|
@@ -239,7 +239,7 @@ class IviCompilationIE(InfoExtractor):
|
||||
self.url_result(
|
||||
'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
|
||||
for serie in re.findall(
|
||||
r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
|
||||
r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
@@ -1,68 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class JpopsukiIE(InfoExtractor):
|
||||
IE_NAME = 'jpopsuki.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
|
||||
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
|
||||
'info_dict': {
|
||||
'id': '00be659d23b0b40508169cdee4545771',
|
||||
'ext': 'mp4',
|
||||
'title': 'ayumi hamasaki - evolution',
|
||||
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
|
||||
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
|
||||
'uploader': 'plama_chan',
|
||||
'uploader_id': '404',
|
||||
'upload_date': '20121101'
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
|
||||
r'<source src="(.*?)" type', webpage, 'video url')
|
||||
|
||||
video_title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<li>from: <a href="/user/view/user/(.*?)/uid/',
|
||||
webpage, 'video uploader', fatal=False)
|
||||
uploader_id = self._html_search_regex(
|
||||
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
|
||||
webpage, 'video uploader_id', fatal=False)
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
|
||||
fatal=False))
|
||||
view_count_str = self._html_search_regex(
|
||||
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
|
||||
fatal=False)
|
||||
comment_count_str = self._html_search_regex(
|
||||
r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
|
||||
fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'upload_date': upload_date,
|
||||
'view_count': int_or_none(view_count_str),
|
||||
'comment_count': int_or_none(comment_count_str),
|
||||
}
|
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unsmuggle_url
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
@@ -32,10 +33,14 @@ class JWPlatformIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
|
||||
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
|
||||
return self._parse_jwplayer_data(json_data, video_id)
|
||||
|
@@ -113,9 +113,14 @@ class KalturaIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = KalturaIE._extract_urls(webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||
mobj = (
|
||||
re.search(
|
||||
finditer = (
|
||||
re.finditer(
|
||||
r"""(?xs)
|
||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||
\{.*?
|
||||
@@ -124,7 +129,7 @@ class KalturaIE(InfoExtractor):
|
||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||
""", webpage)
|
||||
or re.search(
|
||||
or re.finditer(
|
||||
r'''(?xs)
|
||||
(?P<q1>["'])
|
||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||
@@ -138,7 +143,7 @@ class KalturaIE(InfoExtractor):
|
||||
)
|
||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||
''', webpage)
|
||||
or re.search(
|
||||
or re.finditer(
|
||||
r'''(?xs)
|
||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
||||
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||
@@ -148,7 +153,8 @@ class KalturaIE(InfoExtractor):
|
||||
(?P=q1)
|
||||
''', webpage)
|
||||
)
|
||||
if mobj:
|
||||
urls = []
|
||||
for mobj in finditer:
|
||||
embed_info = mobj.groupdict()
|
||||
for k, v in embed_info.items():
|
||||
if v:
|
||||
@@ -160,7 +166,8 @@ class KalturaIE(InfoExtractor):
|
||||
webpage)
|
||||
if service_mobj:
|
||||
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
||||
return url
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
|
||||
params = actions[0]
|
||||
|
@@ -64,7 +64,7 @@ class KUSIIE(InfoExtractor):
|
||||
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
|
||||
description = xpath_text(doc, 'ABSTRACT')
|
||||
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
|
||||
createtion_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
|
||||
|
||||
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
|
||||
formats = []
|
||||
@@ -84,5 +84,5 @@ class KUSIIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': createtion_time,
|
||||
'timestamp': creation_time,
|
||||
}
|
||||
|
91
youtube_dl/extractor/lbry.py
Normal file
91
youtube_dl/extractor/lbry.py
Normal file
@@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class LBRYIE(InfoExtractor):
|
||||
IE_NAME = 'lbry.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||
'info_dict': {
|
||||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||
'ext': 'mp4',
|
||||
'title': 'First day in LBRY? Start HERE!',
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
}
|
||||
}, {
|
||||
# Audio
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||
'info_dict': {
|
||||
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'ext': 'mp3',
|
||||
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': [uri]})[uri]
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', display_id, {'uri': uri})['streaming_url']
|
||||
source = result_value.get('source') or {}
|
||||
media = result_value.get('video') or result_value.get('audio') or {}
|
||||
signing_channel = result_value.get('signing_channel') or {}
|
||||
|
||||
return {
|
||||
'id': result['claim_id'],
|
||||
'title': result_value['title'],
|
||||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': result_value.get('description'),
|
||||
'license': result_value.get('license'),
|
||||
'timestamp': int_or_none(result.get('timestamp')),
|
||||
'tags': result_value.get('tags'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': signing_channel.get('name'),
|
||||
'channel_id': signing_channel.get('claim_id'),
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'url': streaming_url,
|
||||
}
|
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
@@ -36,7 +35,7 @@ class LecturioBaseIE(InfoExtractor):
|
||||
self._LOGIN_URL, None, 'Downloading login popup')
|
||||
|
||||
def is_logged(url_handle):
|
||||
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
||||
return self._LOGIN_URL not in url_handle.geturl()
|
||||
|
||||
# Already logged in
|
||||
if is_logged(urlh):
|
||||
|
@@ -2,23 +2,24 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
unescapeHTML,
|
||||
parse_duration,
|
||||
get_element_by_class,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class LEGOIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
|
||||
'md5': 'f34468f176cfd76488767fc162c405fa',
|
||||
'info_dict': {
|
||||
'id': '55492d823b1b4d5e985787fa8c2973b1',
|
||||
'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
|
||||
'ext': 'mp4',
|
||||
'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
|
||||
'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
|
||||
@@ -26,103 +27,123 @@ class LEGOIE(InfoExtractor):
|
||||
}, {
|
||||
# geo-restricted but the contentUrl contain a valid url
|
||||
'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
|
||||
'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
|
||||
'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
|
||||
'info_dict': {
|
||||
'id': '13bdc2299ab24d9685701a915b3d71e7',
|
||||
'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aflevering 20 - Helden van het koninkrijk',
|
||||
'title': 'Aflevering 20: Helden van het koninkrijk',
|
||||
'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
|
||||
'age_limit': 5,
|
||||
},
|
||||
}, {
|
||||
# special characters in title
|
||||
'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
|
||||
# with subtitle
|
||||
'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
|
||||
'info_dict': {
|
||||
'id': '9685ee9d12e84ff38e84b4e3d0db533d',
|
||||
'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
|
||||
'ext': 'mp4',
|
||||
'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
|
||||
'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
|
||||
'title': 'De kleine puppy',
|
||||
'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
|
||||
'age_limit': 1,
|
||||
'subtitles': {
|
||||
'nl': [{
|
||||
'ext': 'srt',
|
||||
'url': r're:^https://.+\.srt$',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_BITRATES = [256, 512, 1024, 1536, 2560]
|
||||
_QUALITIES = {
|
||||
'Lowest': (64, 180, 320),
|
||||
'Low': (64, 270, 480),
|
||||
'Medium': (96, 360, 640),
|
||||
'High': (128, 540, 960),
|
||||
'Highest': (128, 720, 1280),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = get_element_by_class('video-header', webpage).strip()
|
||||
progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
|
||||
streaming_base = 'http://legoprod-f.akamaihd.net/'
|
||||
content_url = self._html_search_meta('contentUrl', webpage)
|
||||
path = self._search_regex(
|
||||
r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
|
||||
content_url, 'video path', default=None)
|
||||
if not path:
|
||||
player_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
|
||||
webpage, 'player url', default=None))
|
||||
if not player_url:
|
||||
base_url = self._proto_relative_url(self._search_regex(
|
||||
r'data-baseurl="([^"]+)"', webpage, 'base url',
|
||||
default='http://www.lego.com/%s/mediaplayer/video/' % locale))
|
||||
player_url = base_url + video_id
|
||||
player_webpage = self._download_webpage(player_url, video_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r"video='([^']+)'", player_webpage, 'video data')), video_id)
|
||||
progressive_base = self._search_regex(
|
||||
r'data-video-progressive-url="([^"]+)"',
|
||||
player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
|
||||
streaming_base = self._search_regex(
|
||||
r'data-video-streaming-url="([^"]+)"',
|
||||
player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
|
||||
item_id = video_data['ItemId']
|
||||
countries = [locale.split('-')[1].upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': countries,
|
||||
})
|
||||
|
||||
net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
|
||||
base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
|
||||
path = '/'.join([net_storage_path, base_path])
|
||||
streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
|
||||
try:
|
||||
item = self._download_json(
|
||||
# https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
|
||||
'https://services.slingshot.lego.com/mediaplayer/v2',
|
||||
video_id, query={
|
||||
'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
|
||||
}, headers=self.geo_verification_headers())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
|
||||
self.raise_geo_restricted(countries=countries)
|
||||
raise
|
||||
|
||||
formats = self._extract_akamai_formats(
|
||||
'%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
|
||||
formats))
|
||||
if len(m3u8_formats) == len(self._BITRATES):
|
||||
self._sort_formats(m3u8_formats)
|
||||
for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
|
||||
progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
|
||||
mp4_f = m3u8_format.copy()
|
||||
mp4_f.update({
|
||||
'url': progressive_base_url + 'mp4',
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
|
||||
'protocol': 'http',
|
||||
})
|
||||
web_f = {
|
||||
'url': progressive_base_url + 'webm',
|
||||
'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
|
||||
'width': m3u8_format['width'],
|
||||
'height': m3u8_format['height'],
|
||||
'tbr': m3u8_format.get('tbr'),
|
||||
'ext': 'webm',
|
||||
video = item['Video']
|
||||
video_id = video['Id']
|
||||
title = video['Title']
|
||||
|
||||
q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
|
||||
formats = []
|
||||
for video_source in item.get('VideoFormats', []):
|
||||
video_source_url = video_source.get('Url')
|
||||
if not video_source_url:
|
||||
continue
|
||||
video_source_format = video_source.get('Format')
|
||||
if video_source_format == 'F4M':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_source_url, video_id,
|
||||
f4m_id=video_source_format, fatal=False))
|
||||
elif video_source_format == 'M3U8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_source_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=video_source_format, fatal=False))
|
||||
else:
|
||||
video_source_quality = video_source.get('Quality')
|
||||
format_id = []
|
||||
for v in (video_source_format, video_source_quality):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'quality': q(video_source_quality),
|
||||
'url': video_source_url,
|
||||
}
|
||||
formats.extend([web_f, mp4_f])
|
||||
else:
|
||||
for bitrate in self._BITRATES:
|
||||
for ext in ('web', 'mp4'):
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (ext, bitrate),
|
||||
'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
|
||||
'tbr': bitrate,
|
||||
'ext': ext,
|
||||
})
|
||||
quality = self._QUALITIES.get(video_source_quality)
|
||||
if quality:
|
||||
f.update({
|
||||
'abr': quality[0],
|
||||
'height': quality[1],
|
||||
'width': quality[2],
|
||||
}),
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
sub_file_id = video.get('SubFileId')
|
||||
if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
|
||||
net_storage_path = video.get('NetstoragePath')
|
||||
invariant_id = video.get('InvariantId')
|
||||
video_file_id = video.get('VideoFileId')
|
||||
video_version = video.get('VideoVersion')
|
||||
if net_storage_path and invariant_id and video_file_id and video_version:
|
||||
subtitles.setdefault(locale[:2], []).append({
|
||||
'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'thumbnail': self._html_search_meta('thumbnail', webpage),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
'description': video.get('Description'),
|
||||
'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
|
||||
'duration': int_or_none(video.get('Length')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'age_limit': int_or_none(video.get('AgeFrom')),
|
||||
'season': video.get('SeasonTitle'),
|
||||
'season_number': int_or_none(video.get('Season')) or None,
|
||||
'episode_number': int_or_none(video.get('Episode')) or None,
|
||||
}
|
||||
|
@@ -18,7 +18,6 @@ from ..utils import (
|
||||
|
||||
class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, source_url):
|
||||
@@ -70,7 +69,8 @@ class LimelightBaseIE(InfoExtractor):
|
||||
try:
|
||||
return self._download_json(
|
||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
|
||||
item_id, 'Downloading PlaylistService %s JSON' % method,
|
||||
fatal=fatal, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||
@@ -79,22 +79,22 @@ class LimelightBaseIE(InfoExtractor):
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
def _call_api(self, organization_id, item_id, method):
|
||||
return self._download_json(
|
||||
self._API_URL % (organization_id, self._API_PATH, item_id, method),
|
||||
item_id, 'Downloading API %s JSON' % method)
|
||||
|
||||
def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
|
||||
def _extract(self, item_id, pc_method, mobile_method, referer=None):
|
||||
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
|
||||
metadata = self._call_api(pc['orgId'], item_id, meta_method)
|
||||
mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
|
||||
return pc, mobile, metadata
|
||||
mobile = self._call_playlist_service(
|
||||
item_id, mobile_method, fatal=False, referer=referer)
|
||||
return pc, mobile
|
||||
|
||||
def _extract_info(self, pc, mobile, i, referer):
|
||||
get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
|
||||
pc_item = get_item(pc, 'playlistItems')
|
||||
mobile_item = get_item(mobile, 'mediaList')
|
||||
video_id = pc_item.get('mediaId') or mobile_item['mediaId']
|
||||
title = pc_item.get('title') or mobile_item['title']
|
||||
|
||||
def _extract_info(self, streams, mobile_urls, properties):
|
||||
video_id = properties['media_id']
|
||||
formats = []
|
||||
urls = []
|
||||
for stream in streams:
|
||||
for stream in pc_item.get('streams', []):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream.get('drmProtected') or stream_url in urls:
|
||||
continue
|
||||
@@ -155,7 +155,7 @@ class LimelightBaseIE(InfoExtractor):
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
for mobile_url in mobile_urls:
|
||||
for mobile_url in mobile_item.get('mobileUrls', []):
|
||||
media_url = mobile_url.get('mobileUrl')
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
|
||||
@@ -179,54 +179,34 @@ class LimelightBaseIE(InfoExtractor):
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = properties['title']
|
||||
description = properties.get('description')
|
||||
timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
|
||||
duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
|
||||
filesize = int_or_none(properties.get('total_storage_in_bytes'))
|
||||
categories = [properties.get('category')]
|
||||
tags = properties.get('tags', [])
|
||||
thumbnails = [{
|
||||
'url': thumbnail['url'],
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
} for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
|
||||
|
||||
subtitles = {}
|
||||
for caption in properties.get('captions', []):
|
||||
lang = caption.get('language_code')
|
||||
subtitles_url = caption.get('url')
|
||||
if lang and subtitles_url:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitles_url,
|
||||
})
|
||||
closed_captions_url = properties.get('closed_captions_url')
|
||||
if closed_captions_url:
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': closed_captions_url,
|
||||
'ext': 'ttml',
|
||||
})
|
||||
for flag in mobile_item.get('flags'):
|
||||
if flag == 'ClosedCaptions':
|
||||
closed_captions = self._call_playlist_service(
|
||||
video_id, 'getClosedCaptionsDetailsByMediaId',
|
||||
False, referer) or []
|
||||
for cc in closed_captions:
|
||||
cc_url = cc.get('webvttFileUrl')
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
break
|
||||
|
||||
get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'description': get_meta('description'),
|
||||
'formats': formats,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'filesize': filesize,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
'thumbnails': thumbnails,
|
||||
'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
|
||||
'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _extract_info_helper(self, pc, mobile, i, metadata):
|
||||
return self._extract_info(
|
||||
try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
|
||||
try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
|
||||
metadata)
|
||||
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
@@ -251,8 +231,6 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
'description': 'md5:8005b944181778e313d95c1237ddb640',
|
||||
'thumbnail': r're:^https?://.*\.jpeg$',
|
||||
'duration': 144.23,
|
||||
'timestamp': 1244136834,
|
||||
'upload_date': '20090604',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@@ -268,30 +246,29 @@ class LimelightMediaIE(LimelightBaseIE):
|
||||
'title': '3Play Media Overview Video',
|
||||
'thumbnail': r're:^https?://.*\.jpeg$',
|
||||
'duration': 78.101,
|
||||
'timestamp': 1338929955,
|
||||
'upload_date': '20120605',
|
||||
'subtitles': 'mincount:9',
|
||||
# TODO: extract all languages that were accessible via API
|
||||
# 'subtitles': 'mincount:9',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'media'
|
||||
_API_PATH = 'media'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
source_url = smuggled_data.get('source_url')
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
pc, mobile, metadata = self._extract(
|
||||
pc, mobile = self._extract(
|
||||
video_id, 'getPlaylistByMediaId',
|
||||
'getMobilePlaylistByMediaId', 'properties',
|
||||
smuggled_data.get('source_url'))
|
||||
'getMobilePlaylistByMediaId', source_url)
|
||||
|
||||
return self._extract_info_helper(pc, mobile, 0, metadata)
|
||||
return self._extract_info(pc, mobile, 0, source_url)
|
||||
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
@@ -313,6 +290,7 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
'info_dict': {
|
||||
'id': 'ab6a524c379342f9b23642917020c082',
|
||||
'title': 'Javascript Sample Code',
|
||||
'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
@@ -320,22 +298,23 @@ class LimelightChannelIE(LimelightBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel'
|
||||
_API_PATH = 'channels'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
channel_id = self._match_id(url)
|
||||
source_url = smuggled_data.get('source_url')
|
||||
|
||||
pc, mobile, medias = self._extract(
|
||||
pc, mobile = self._extract(
|
||||
channel_id, 'getPlaylistByChannelId',
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
|
||||
'media', smuggled_data.get('source_url'))
|
||||
source_url)
|
||||
|
||||
entries = [
|
||||
self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
|
||||
for i in range(len(medias['media_list']))]
|
||||
self._extract_info(pc, mobile, i, source_url)
|
||||
for i in range(len(pc['playlistItems']))]
|
||||
|
||||
return self.playlist_result(entries, channel_id, pc['title'])
|
||||
return self.playlist_result(
|
||||
entries, channel_id, pc.get('title'), mobile.get('description'))
|
||||
|
||||
|
||||
class LimelightChannelListIE(LimelightBaseIE):
|
||||
@@ -368,10 +347,12 @@ class LimelightChannelListIE(LimelightBaseIE):
|
||||
def _real_extract(self, url):
|
||||
channel_list_id = self._match_id(url)
|
||||
|
||||
channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
|
||||
channel_list = self._call_playlist_service(
|
||||
channel_list_id, 'getMobileChannelListById')
|
||||
|
||||
entries = [
|
||||
self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
|
||||
for channel in channel_list['channelList']]
|
||||
|
||||
return self.playlist_result(entries, channel_list_id, channel_list['title'])
|
||||
return self.playlist_result(
|
||||
entries, channel_list_id, channel_list['title'])
|
||||
|
@@ -8,7 +8,6 @@ from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -99,7 +98,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
'sso': 'true',
|
||||
})
|
||||
|
||||
login_state_url = compat_str(urlh.geturl())
|
||||
login_state_url = urlh.geturl()
|
||||
|
||||
try:
|
||||
login_page = self._download_webpage(
|
||||
@@ -129,7 +128,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||
})
|
||||
|
||||
access_token = self._search_regex(
|
||||
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
||||
r'access_token=([^=&]+)', urlh.geturl(),
|
||||
'access token')
|
||||
|
||||
self._download_webpage(
|
||||
|
@@ -5,28 +5,26 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
class LRTIE(InfoExtractor):
|
||||
IE_NAME = 'lrt.lt'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
|
||||
_TESTS = [{
|
||||
# m3u8 download
|
||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
||||
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
|
||||
'md5': '85cb2bb530f31d91a9c65b479516ade4',
|
||||
'info_dict': {
|
||||
'id': '54391',
|
||||
'id': '2000127261',
|
||||
'ext': 'mp4',
|
||||
'title': 'Septynios Kauno dienos',
|
||||
'description': 'md5:24d84534c7dc76581e59f5689462411a',
|
||||
'duration': 1783,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
|
||||
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
|
||||
'duration': 3035,
|
||||
'timestamp': 1604079000,
|
||||
'upload_date': '20201030',
|
||||
},
|
||||
}, {
|
||||
# direct mp3 download
|
||||
@@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_js_var(self, webpage, var_name, default):
|
||||
return self._search_regex(
|
||||
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
|
||||
webpage, var_name.replace('_', ' '), default, group=2)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
||||
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||
media = self._download_json(self._extract_js_var(
|
||||
webpage, 'media_info_url',
|
||||
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
|
||||
video_id, query={'url': media_url})
|
||||
jw_data = self._parse_jwplayer_data(
|
||||
media['playlist_item'], video_id, base_url=url)
|
||||
|
||||
formats = []
|
||||
for _, file_url in re.findall(
|
||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
||||
ext = determine_ext(file_url)
|
||||
if ext not in ('m3u8', 'mp3'):
|
||||
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||
|
||||
tags = []
|
||||
for tag in (media.get('tags') or []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
# mp3 served as m3u8 produces stuttered media file
|
||||
if ext == 'm3u8' and '.mp3' in file_url:
|
||||
continue
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
fatal=False))
|
||||
elif ext == 'mp3':
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
tags.append(tag_name)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
|
||||
webpage, 'duration', default=None, group='duration'))
|
||||
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
|
||||
webpage, 'view count', fatal=False, group='count'))
|
||||
like_count = int_or_none(self._search_regex(
|
||||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
|
||||
webpage, 'like count', fatal=False, group='count'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
clean_info = {
|
||||
'description': clean_html(media.get('content')),
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||
|
@@ -128,6 +128,12 @@ class MailRuIE(InfoExtractor):
|
||||
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
headers = {}
|
||||
|
||||
video_key = self._get_cookies('https://my.mail.ru').get('video_key')
|
||||
if video_key:
|
||||
headers['Cookie'] = 'video_key=%s' % video_key.value
|
||||
|
||||
formats = []
|
||||
for f in video_data['videos']:
|
||||
video_url = f.get('url')
|
||||
@@ -140,6 +146,7 @@ class MailRuIE(InfoExtractor):
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'height': height,
|
||||
'http_headers': headers,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -1,14 +1,20 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import merge_dicts
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MallTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'md5': '1c4a37f080e1f3023103a7b43458e518',
|
||||
@@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
|
||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'ext': 'mp4',
|
||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
||||
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||
'duration': 216,
|
||||
'timestamp': 1538870400,
|
||||
'upload_date': '20181007',
|
||||
@@ -26,6 +32,9 @@ class MallTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -34,20 +43,46 @@ class MallTVIE(InfoExtractor):
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
|
||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
||||
video = self._parse_json(self._search_regex(
|
||||
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||
webpage, 'video object'), display_id)
|
||||
video_source = video['VideoSource']
|
||||
video_id = self._search_regex(
|
||||
SOURCE_RE, webpage, 'video id', group='id')
|
||||
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||
|
||||
media = self._parse_html5_media_entries(
|
||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for s in (video.get('Subtitles') or {}):
|
||||
s_url = s.get('Url')
|
||||
if not s_url:
|
||||
continue
|
||||
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||
'url': s_url,
|
||||
})
|
||||
|
||||
entity_counts = video.get('EntityCounts') or {}
|
||||
|
||||
def get_count(k):
|
||||
v = entity_counts.get(k + 's') or {}
|
||||
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return merge_dicts(media, info, {
|
||||
return merge_dicts({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
})
|
||||
'title': video.get('Title'),
|
||||
'description': clean_html(video.get('Description')),
|
||||
'thumbnail': video.get('ThumbnailUrl'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||
'view_count': get_count('View'),
|
||||
'like_count': get_count('Like'),
|
||||
'dislike_count': get_count('Dislike'),
|
||||
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||
'comment_count': get_count('Comment'),
|
||||
}, info)
|
||||
|
131
youtube_dl/extractor/medaltv.py
Normal file
131
youtube_dl/extractor/medaltv.py
Normal file
@@ -0,0 +1,131 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||
'info_dict': {
|
||||
'id': '34934644',
|
||||
'ext': 'mp4',
|
||||
'title': 'Quad Cold',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'MowgliSB',
|
||||
'timestamp': 1603165266,
|
||||
'upload_date': '20201020',
|
||||
'uploader_id': 10619174,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/clips/36787208',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '36787208',
|
||||
'ext': 'mp4',
|
||||
'title': 'u tk me i tk u bigger',
|
||||
'description': 'Medal,https://medal.tv/desktop/',
|
||||
'uploader': 'Mimicc',
|
||||
'timestamp': 1605580939,
|
||||
'upload_date': '20201117',
|
||||
'uploader_id': 5156321,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration_data = self._parse_json(self._search_regex(
|
||||
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||
webpage, 'hydration data', default='{}'), video_id)
|
||||
|
||||
clip = try_get(
|
||||
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
||||
title = clip['contentTitle']
|
||||
|
||||
source_width = int_or_none(clip.get('sourceWidth'))
|
||||
source_height = int_or_none(clip.get('sourceHeight'))
|
||||
|
||||
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||
|
||||
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||
item_id = item_id or '%dp' % height
|
||||
if item_id not in item_url:
|
||||
return
|
||||
width = int(round(aspect_ratio * height))
|
||||
container.append({
|
||||
'url': item_url,
|
||||
id_key: item_id,
|
||||
'width': width,
|
||||
'height': height
|
||||
})
|
||||
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for k, v in clip.items():
|
||||
if not (v and isinstance(v, compat_str)):
|
||||
continue
|
||||
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||
if not mobj:
|
||||
continue
|
||||
prefix = mobj.group(1)
|
||||
height = int_or_none(mobj.group(2))
|
||||
if prefix == 'contentUrl':
|
||||
add_item(
|
||||
formats, v, height or source_height,
|
||||
item_id=None if height else 'source')
|
||||
elif prefix == 'thumbnail':
|
||||
add_item(thumbnails, v, height, 'id')
|
||||
|
||||
error = clip.get('error')
|
||||
if not formats and error:
|
||||
if error == 404:
|
||||
raise ExtractorError(
|
||||
'That clip does not exist.',
|
||||
expected=True, video_id=video_id)
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'An unknown error occurred ({0}).'.format(error),
|
||||
video_id=video_id)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = try_get(
|
||||
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||
author_id = str_or_none(author.get('id'))
|
||||
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': clip.get('contentDescription'),
|
||||
'uploader': author.get('displayName'),
|
||||
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||
'uploader_id': author_id,
|
||||
'uploader_url': author_url,
|
||||
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||
'view_count': int_or_none(clip.get('views')),
|
||||
'like_count': int_or_none(clip.get('likes')),
|
||||
'comment_count': int_or_none(clip.get('comments')),
|
||||
}
|
@@ -6,7 +6,6 @@ import re
|
||||
from .theplatform import ThePlatformBaseIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -114,7 +113,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
continue
|
||||
urlh = ie._request_webpage(
|
||||
embed_url, video_id, note='Following embed URL redirect')
|
||||
embed_url = compat_str(urlh.geturl())
|
||||
embed_url = urlh.geturl()
|
||||
program_guid = _program_guid(_qs(embed_url))
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
|
@@ -129,7 +129,7 @@ class MediasiteIE(InfoExtractor):
|
||||
query = mobj.group('query')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||
redirect_url = compat_str(urlh.geturl())
|
||||
redirect_url = urlh.geturl()
|
||||
|
||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||
|
@@ -17,9 +17,8 @@ from ..utils import (
|
||||
|
||||
|
||||
class MGTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||
IE_DESC = '芒果TV'
|
||||
_GEO_COUNTRIES = ['CN']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||
@@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://w.mgtv.com/b/301817/3826653.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
||||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
|
||||
'tk2': tk2,
|
||||
'video_id': video_id,
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
except ExtractorError as e:
|
||||
@@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
|
||||
stream_data = self._download_json(
|
||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||
'pm2': api_data['atc']['pm2'],
|
||||
'tk2': tk2,
|
||||
'video_id': video_id,
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
stream_domain = stream_data['stream_domain'][0]
|
||||
|
@@ -1,5 +1,8 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
@@ -54,3 +57,23 @@ class MofosexIE(KeezMoviesIE):
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class MofosexEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
|
||||
ie=MofosexIE.ie_key(), video_id=video_id)
|
||||
|
@@ -26,7 +26,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
|
||||
'upload_date': '20100913',
|
||||
'uploader_id': 'famouslyfuckedup',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@@ -40,7 +40,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'game', 'hairy'],
|
||||
'upload_date': '20140622',
|
||||
'uploader_id': 'Sulivana7x',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': '404',
|
||||
@@ -54,7 +54,7 @@ class MotherlessIE(InfoExtractor):
|
||||
'categories': ['superheroine heroine superher'],
|
||||
'upload_date': '20140827',
|
||||
'uploader_id': 'shade0230',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
@@ -76,7 +76,8 @@ class MotherlessIE(InfoExtractor):
|
||||
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
|
||||
(r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
|
||||
r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
|
||||
video_url = (self._html_search_regex(
|
||||
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
|
||||
@@ -84,14 +85,15 @@ class MotherlessIE(InfoExtractor):
|
||||
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||
age_limit = self._rta_search(webpage)
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Views</strong>\s+([^<]+)<',
|
||||
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._html_search_regex(
|
||||
r'<strong>Favorited</strong>\s+([^<]+)<',
|
||||
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||
webpage, 'like count', fatal=False))
|
||||
|
||||
upload_date = self._html_search_regex(
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
|
||||
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
||||
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
||||
if 'Ago' in upload_date:
|
||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
||||
|
@@ -349,6 +349,18 @@ class MTVIE(MTVServicesInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def extract_child_with_type(parent, t):
|
||||
children = parent['children']
|
||||
return next(c for c in children if c.get('type') == t)
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||
main_container = self.extract_child_with_type(data, 'MainContainer')
|
||||
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
|
||||
return video_player['props']['media']['video']['config']['uri']
|
||||
|
||||
|
||||
class MTVJapanIE(MTVServicesInfoExtractor):
|
||||
IE_NAME = 'mtvjapan'
|
||||
|
@@ -1,68 +1,33 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class NaverIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
|
||||
class NaverBaseIE(InfoExtractor):
|
||||
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.naver.com/v/81652',
|
||||
'info_dict': {
|
||||
'id': '81652',
|
||||
'ext': 'mp4',
|
||||
'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
'upload_date': '20130903',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '638ed4c12012c458fefcddfd01f173cd',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
'ext': 'mp4',
|
||||
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||
'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
|
||||
'upload_date': '20150519',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
vid = self._search_regex(
|
||||
r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'video id', fatal=None, group='value')
|
||||
in_key = self._search_regex(
|
||||
r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'key', default=None, group='value')
|
||||
|
||||
if not vid or not in_key:
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
|
||||
webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
def _extract_video_info(self, video_id, vid, key):
|
||||
video_data = self._download_json(
|
||||
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
|
||||
video_id, query={
|
||||
'key': in_key,
|
||||
'key': key,
|
||||
})
|
||||
meta = video_data['meta']
|
||||
title = meta['subject']
|
||||
formats = []
|
||||
get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or []
|
||||
|
||||
def extract_formats(streams, stream_type, query={}):
|
||||
for stream in streams:
|
||||
@@ -73,7 +38,7 @@ class NaverIE(InfoExtractor):
|
||||
encoding_option = stream.get('encodingOption', {})
|
||||
bitrate = stream.get('bitrate', {})
|
||||
formats.append({
|
||||
'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')),
|
||||
'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
|
||||
'url': stream_url,
|
||||
'width': int_or_none(encoding_option.get('width')),
|
||||
'height': int_or_none(encoding_option.get('height')),
|
||||
@@ -83,7 +48,7 @@ class NaverIE(InfoExtractor):
|
||||
'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
|
||||
})
|
||||
|
||||
extract_formats(video_data.get('videos', {}).get('list', []), 'H264')
|
||||
extract_formats(get_list('video'), 'H264')
|
||||
for stream_set in video_data.get('streams', []):
|
||||
query = {}
|
||||
for param in stream_set.get('keys', []):
|
||||
@@ -101,28 +66,101 @@ class NaverIE(InfoExtractor):
|
||||
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
|
||||
|
||||
def get_subs(caption_url):
|
||||
if re.search(self._CAPTION_EXT_RE, caption_url):
|
||||
return [{
|
||||
'url': replace_ext(caption_url, 'ttml'),
|
||||
}, {
|
||||
'url': replace_ext(caption_url, 'vtt'),
|
||||
}]
|
||||
else:
|
||||
return [{'url': caption_url}]
|
||||
|
||||
automatic_captions = {}
|
||||
subtitles = {}
|
||||
for caption in video_data.get('captions', {}).get('list', []):
|
||||
for caption in get_list('caption'):
|
||||
caption_url = caption.get('source')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({
|
||||
'url': caption_url,
|
||||
})
|
||||
sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
|
||||
sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url))
|
||||
|
||||
upload_date = self._search_regex(
|
||||
r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})',
|
||||
webpage, 'upload date', fatal=False)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('.', '')
|
||||
user = meta.get('user', {})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage),
|
||||
'automatic_captions': automatic_captions,
|
||||
'thumbnail': try_get(meta, lambda x: x['cover']['source']),
|
||||
'view_count': int_or_none(meta.get('count')),
|
||||
'upload_date': upload_date,
|
||||
'uploader_id': user.get('id'),
|
||||
'uploader': user.get('name'),
|
||||
'uploader_url': user.get('url'),
|
||||
}
|
||||
|
||||
|
||||
class NaverIE(NaverBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.naver.com/v/81652',
|
||||
'info_dict': {
|
||||
'id': '81652',
|
||||
'ext': 'mp4',
|
||||
'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||
'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||
'timestamp': 1378200754,
|
||||
'upload_date': '20130903',
|
||||
'uploader': '메가스터디, 합격불변의 법칙',
|
||||
'uploader_id': 'megastudy',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '8a38e35354d26a17f73f4e90094febd3',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
'ext': 'mp4',
|
||||
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
|
||||
'timestamp': 1432030253,
|
||||
'upload_date': '20150519',
|
||||
'uploader': '4가지쇼 시즌2',
|
||||
'uploader_id': 'wrappinguser29',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://tv.naver.com/api/json/v/' + video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
player_info_json = content.get('playerInfoJson') or {}
|
||||
current_clip = player_info_json.get('currentClip') or {}
|
||||
|
||||
vid = current_clip.get('videoId')
|
||||
in_key = current_clip.get('inKey')
|
||||
|
||||
if not vid or not in_key:
|
||||
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
|
||||
if player_auth == 'notCountry':
|
||||
self.raise_geo_restricted(countries=['KR'])
|
||||
elif player_auth == 'notLogin':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
info = self._extract_video_info(video_id, vid, in_key)
|
||||
info.update({
|
||||
'description': clean_html(current_clip.get('description')),
|
||||
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
|
||||
'duration': parse_duration(current_clip.get('displayPlayTime')),
|
||||
'like_count': int_or_none(current_clip.get('recommendPoint')),
|
||||
'age_limit': 19 if current_clip.get('adult') else None,
|
||||
})
|
||||
return info
|
||||
|
@@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
@@ -87,11 +86,25 @@ class NBCIE(AdobePassIE):
|
||||
def _real_extract(self, url):
|
||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||
permalink = 'http' + compat_urllib_parse_unquote(permalink)
|
||||
response = self._download_json(
|
||||
video_data = self._download_json(
|
||||
'https://friendship.nbc.co/v2/graphql', video_id, query={
|
||||
'query': '''{
|
||||
page(name: "%s", platform: web, type: VIDEO, userId: "0") {
|
||||
data {
|
||||
'query': '''query bonanzaPage(
|
||||
$app: NBCUBrands! = nbc
|
||||
$name: String!
|
||||
$oneApp: Boolean
|
||||
$platform: SupportedPlatforms! = web
|
||||
$type: EntityPageType! = VIDEO
|
||||
$userId: String!
|
||||
) {
|
||||
bonanzaPage(
|
||||
app: $app
|
||||
name: $name
|
||||
oneApp: $oneApp
|
||||
platform: $platform
|
||||
type: $type
|
||||
userId: $userId
|
||||
) {
|
||||
metadata {
|
||||
... on VideoPageData {
|
||||
description
|
||||
episodeNumber
|
||||
@@ -100,15 +113,20 @@ class NBCIE(AdobePassIE):
|
||||
mpxAccountId
|
||||
mpxGuid
|
||||
rating
|
||||
resourceId
|
||||
seasonNumber
|
||||
secondaryTitle
|
||||
seriesShortTitle
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % permalink,
|
||||
})
|
||||
video_data = response['data']['page']['data']
|
||||
}''',
|
||||
'variables': json.dumps({
|
||||
'name': permalink,
|
||||
'oneApp': True,
|
||||
'userId': '0',
|
||||
}),
|
||||
})['data']['bonanzaPage']['metadata']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
@@ -117,8 +135,8 @@ class NBCIE(AdobePassIE):
|
||||
title = video_data['secondaryTitle']
|
||||
if video_data.get('locked'):
|
||||
resource = self._get_mvpd_resource(
|
||||
'nbcentertainment', title, video_id,
|
||||
video_data.get('rating'))
|
||||
video_data.get('resourceId') or 'nbcentertainment',
|
||||
title, video_id, video_data.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'nbcentertainment', resource)
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
@@ -375,8 +393,8 @@ class NBCNewsIE(ThePlatformIE):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'window\.__data\s*=\s*({.+});', webpage,
|
||||
'bootstrap json'), video_id, js_to_json)
|
||||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||
webpage, 'bootstrap json'), video_id)['props']['initialState']
|
||||
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
||||
if not video_data:
|
||||
video_data = data['article']['content'][0]['primaryMedia']['video']
|
||||
|
@@ -7,8 +7,11 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@@ -78,6 +81,29 @@ class NDRIE(NDRBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# with subtitles
|
||||
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
|
||||
'info_dict': {
|
||||
'id': 'extra18674',
|
||||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||
'ext': 'mp4',
|
||||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||
'uploader': 'ndrtv',
|
||||
'upload_date': '20201113',
|
||||
'duration': 1749,
|
||||
'subtitles': {
|
||||
'de': [{
|
||||
'ext': 'ttml',
|
||||
'url': r're:^https://www\.ndr\.de.+',
|
||||
}],
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||
'only_matching': True,
|
||||
@@ -85,21 +111,25 @@ class NDRIE(NDRBaseIE):
|
||||
|
||||
def _extract_embed(self, webpage, display_id):
|
||||
embed_url = self._html_search_meta(
|
||||
'embedURL', webpage, 'embed URL', fatal=True)
|
||||
'embedURL', webpage, 'embed URL',
|
||||
default=None) or self._search_regex(
|
||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'embed URL', group='url')
|
||||
description = self._search_regex(
|
||||
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(
|
||||
self._search_regex(
|
||||
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
|
||||
webpage, 'upload date', fatal=False))
|
||||
return {
|
||||
webpage, 'upload date', default=None))
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
return merge_dicts({
|
||||
'_type': 'url_transparent',
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
}, info)
|
||||
|
||||
|
||||
class NJoyIE(NDRBaseIE):
|
||||
@@ -220,11 +250,31 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
upload_date = ppjson.get('config', {}).get('publicationDate')
|
||||
duration = int_or_none(config.get('duration'))
|
||||
|
||||
thumbnails = [{
|
||||
'id': thumbnail.get('quality') or thumbnail_id,
|
||||
'url': thumbnail['src'],
|
||||
'preference': quality_key(thumbnail.get('quality')),
|
||||
} for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
|
||||
thumbnails = []
|
||||
poster = try_get(config, lambda x: x['poster'], dict) or {}
|
||||
for thumbnail_id, thumbnail in poster.items():
|
||||
thumbnail_url = urljoin(url, thumbnail.get('src'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail.get('quality') or thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
'preference': quality_key(thumbnail.get('quality')),
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
tracks = config.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_url = urljoin(url, track.get('src'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('srclang') or 'de', []).append({
|
||||
'url': track_url,
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -235,6 +285,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class NhkVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[a-z]+-\d{8}-\d+)'
|
||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
|
||||
# Content available only for a limited period of time. Visit
|
||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||
_TESTS = [{
|
||||
@@ -30,8 +30,11 @@ class NhkVodIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
|
||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
|
||||
@@ -82,15 +85,9 @@ class NhkVodIE(InfoExtractor):
|
||||
audio = episode['audio']
|
||||
audio_path = audio['audio']
|
||||
info['formats'] = self._extract_m3u8_formats(
|
||||
'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
||||
episode_id, 'm4a', m3u8_id='hls', fatal=False)
|
||||
for proto in ('rtmpt', 'rtmp'):
|
||||
info['formats'].append({
|
||||
'ext': 'flv',
|
||||
'format_id': proto,
|
||||
'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in info['formats']:
|
||||
f['language'] = lang
|
||||
return info
|
||||
|
@@ -6,6 +6,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
@@ -18,7 +19,7 @@ class NovaEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
|
||||
'md5': 'b3834f6de5401baabf31ed57456463f7',
|
||||
'md5': 'ee009bafcc794541570edd44b71cbea3',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
@@ -33,36 +34,76 @@ class NovaEmbedIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
|
||||
webpage, 'player', default='{}'), video_id, fatal=False)
|
||||
if player:
|
||||
for format_id, format_list in player['tracks'].items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_dict in format_list:
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('src'))
|
||||
format_type = format_dict.get('type')
|
||||
ext = determine_ext(format_url)
|
||||
if (format_type == 'application/x-mpegURL'
|
||||
or format_id == 'HLS' or ext == 'm3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
elif (format_type == 'application/dash+xml'
|
||||
or format_id == 'DASH' or ext == 'mpd'):
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
duration = int_or_none(player.get('duration'))
|
||||
else:
|
||||
# Old path, not actual as of 08.04.2020
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
format_list = [format_list]
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
if format_id == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(
|
||||
@@ -75,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
|
||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='value')
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
|
||||
default=duration))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -91,7 +133,7 @@ class NovaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
|
||||
'md5': '249baab7d0104e186e78b0899c7d5f28',
|
||||
'info_dict': {
|
||||
'id': '1757139',
|
||||
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
||||
@@ -113,7 +155,8 @@ class NovaIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'gone',
|
||||
}, {
|
||||
# media.cms.nova.cz embed
|
||||
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
|
||||
@@ -128,6 +171,7 @@ class NovaIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [NovaEmbedIE.ie_key()],
|
||||
'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
|
||||
}, {
|
||||
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
||||
'only_matching': True,
|
||||
@@ -152,14 +196,29 @@ class NovaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
description = clean_html(self._og_search_description(webpage, default=None))
|
||||
if site == 'novaplus':
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
|
||||
elif site == 'fanda':
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
|
||||
else:
|
||||
upload_date = None
|
||||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return self.url_result(
|
||||
'https://media.cms.nova.cz/embed/%s' % embed_id,
|
||||
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
|
||||
'ie_key': NovaEmbedIE.ie_key(),
|
||||
'id': embed_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date
|
||||
}
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
||||
@@ -233,18 +292,8 @@ class NovaIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
|
||||
description = clean_html(self._og_search_description(webpage, default=None))
|
||||
thumbnail = config.get('poster')
|
||||
|
||||
if site == 'novaplus':
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
|
||||
elif site == 'fanda':
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
|
||||
else:
|
||||
upload_date = None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@@ -4,6 +4,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -32,7 +33,7 @@ class NprIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# mutlimedia, not media title
|
||||
# multimedia, not media title
|
||||
'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
|
||||
'info_dict': {
|
||||
'id': '533198237',
|
||||
@@ -48,6 +49,10 @@ class NprIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
# multimedia, no formats, stream
|
||||
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -95,6 +100,17 @@ class NprIE(InfoExtractor):
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
for stream_id, stream_entry in media.get('stream', {}).items():
|
||||
if not isinstance(stream_entry, dict):
|
||||
continue
|
||||
if stream_id != 'hlsUrl':
|
||||
continue
|
||||
stream_url = url_or_none(stream_entry.get('$text'))
|
||||
if not stream_url:
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, stream_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
entries.append({
|
||||
|
@@ -9,191 +9,21 @@ from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
JSON_LD_RE,
|
||||
js_to_json,
|
||||
NO_DEFAULT,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NRKBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['NO']
|
||||
|
||||
_api_host = None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||
|
||||
for api_host in api_hosts:
|
||||
data = self._download_json(
|
||||
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||
video_id, 'Downloading mediaelement JSON',
|
||||
fatal=api_host == api_hosts[-1])
|
||||
if not data:
|
||||
continue
|
||||
self._api_host = api_host
|
||||
break
|
||||
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
entries = []
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
live = (data.get('mediaElementType') == 'Live'
|
||||
or data.get('isLive') is True or conviva.get('isLive'))
|
||||
|
||||
def make_title(t):
|
||||
return self._live_title(t) if live else t
|
||||
|
||||
media_assets = data.get('mediaAssets')
|
||||
if media_assets and isinstance(media_assets, list):
|
||||
def video_id_and_title(idx):
|
||||
return ((video_id, title) if len(media_assets) == 1
|
||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||
for num, asset in enumerate(media_assets, 1):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
formats = self._extract_akamai_formats(asset_url, video_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Some f4m streams may not work with hdcore in fragments' URLs
|
||||
for f in formats:
|
||||
extra_param = f.get('extra_param_to_segment_url')
|
||||
if extra_param and 'hdcore' in extra_param:
|
||||
del f['extra_param_to_segment_url']
|
||||
|
||||
entry_id, entry_title = video_id_and_title(num)
|
||||
duration = parse_duration(asset.get('duration'))
|
||||
subtitles = {}
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({
|
||||
'url': compat_urllib_parse_unquote(subtitle_url)
|
||||
})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': make_title(entry_title),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
media_url = data.get('mediaUrl')
|
||||
if media_url:
|
||||
formats = self._extract_akamai_formats(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': make_title(title),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
MESSAGES = {
|
||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
}
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type:
|
||||
self.raise_geo_restricted(
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
||||
message_type, message_type)),
|
||||
expected=True)
|
||||
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
web_images = images.get('webImages')
|
||||
if isinstance(web_images, list):
|
||||
thumbnails = [{
|
||||
'url': image['imageUrl'],
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
|
||||
|
||||
for entry in entries:
|
||||
entry.update(common_info)
|
||||
for f in entry['formats']:
|
||||
f['vcodec'] = vcodec
|
||||
|
||||
points = data.get('shortIndexPoints')
|
||||
if isinstance(points, list):
|
||||
chapters = []
|
||||
for next_num, point in enumerate(points, start=1):
|
||||
if not isinstance(point, dict):
|
||||
continue
|
||||
start_time = parse_duration(point.get('startPoint'))
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = parse_duration(
|
||||
data.get('duration')
|
||||
if next_num == len(points)
|
||||
else points[next_num].get('startPoint'))
|
||||
if end_time is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': point.get('title'),
|
||||
})
|
||||
if chapters and len(entries) == 1:
|
||||
entries[0]['chapters'] = chapters
|
||||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
|
||||
class NRKIE(NRKBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
@@ -201,13 +31,13 @@ class NRKIE(NRKBaseIE):
|
||||
nrk:|
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?nrk\.no/video/PS\*|
|
||||
(?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
|
||||
v8[-.]psapi\.nrk\.no/mediaelement/
|
||||
)
|
||||
)
|
||||
(?P<id>[^?#&]+)
|
||||
(?P<id>[^?\#&]+)
|
||||
'''
|
||||
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
|
||||
|
||||
_TESTS = [{
|
||||
# video
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
@@ -239,8 +69,76 @@ class NRKIE(NRKBaseIE):
|
||||
}, {
|
||||
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_from_playback(self, video_id):
|
||||
manifest = self._download_json(
|
||||
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
|
||||
video_id, 'Downloading manifest JSON')
|
||||
|
||||
playable = manifest['playable']
|
||||
|
||||
formats = []
|
||||
for asset in playable['assets']:
|
||||
if not isinstance(asset, dict):
|
||||
continue
|
||||
if asset.get('encrypted'):
|
||||
continue
|
||||
format_url = url_or_none(asset.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
data = self._download_json(
|
||||
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
|
||||
video_id, 'Downloading metadata JSON')
|
||||
|
||||
preplay = data['preplay']
|
||||
titles = preplay['titles']
|
||||
title = titles['title']
|
||||
alt_title = titles.get('subtitle')
|
||||
|
||||
description = preplay.get('description')
|
||||
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
|
||||
|
||||
thumbnails = []
|
||||
for image in try_get(
|
||||
preplay, lambda x: x['poster']['images'], list) or []:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('pixelWidth')),
|
||||
'height': int_or_none(image.get('pixelHeight')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'alt_title': alt_title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_playback(video_id)
|
||||
|
||||
|
||||
class NRKTVIE(NRKBaseIE):
|
||||
IE_DESC = 'NRK TV and NRK Radio'
|
||||
@@ -255,6 +153,17 @@ class NRKTVIE(NRKBaseIE):
|
||||
''' % _EPISODE_RE
|
||||
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/program/MDDP12000117',
|
||||
'md5': '8270824df46ec629b66aeaa5796b36fb',
|
||||
'info_dict': {
|
||||
'id': 'MDDP12000117AA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Alarm Trolltunga',
|
||||
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
|
||||
'duration': 2223,
|
||||
'age_limit': 6,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
|
||||
'md5': '9a167e54d04671eb6317a37b7bc8a280',
|
||||
'info_dict': {
|
||||
@@ -266,6 +175,7 @@ class NRKTVIE(NRKBaseIE):
|
||||
'series': '20 spørsmål',
|
||||
'episode': '23.05.2014',
|
||||
},
|
||||
'skip': 'NoProgramRights',
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/program/mdfp15000514',
|
||||
'info_dict': {
|
||||
@@ -367,10 +277,202 @@ class NRKTVIE(NRKBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_api_host = None
|
||||
|
||||
def _extract_from_mediaelement(self, video_id):
|
||||
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
|
||||
|
||||
for api_host in api_hosts:
|
||||
data = self._download_json(
|
||||
'http://%s/mediaelement/%s' % (api_host, video_id),
|
||||
video_id, 'Downloading mediaelement JSON',
|
||||
fatal=api_host == api_hosts[-1])
|
||||
if not data:
|
||||
continue
|
||||
self._api_host = api_host
|
||||
break
|
||||
|
||||
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
|
||||
video_id = data.get('id') or video_id
|
||||
|
||||
entries = []
|
||||
|
||||
conviva = data.get('convivaStatistics') or {}
|
||||
live = (data.get('mediaElementType') == 'Live'
|
||||
or data.get('isLive') is True or conviva.get('isLive'))
|
||||
|
||||
def make_title(t):
|
||||
return self._live_title(t) if live else t
|
||||
|
||||
media_assets = data.get('mediaAssets')
|
||||
if media_assets and isinstance(media_assets, list):
|
||||
def video_id_and_title(idx):
|
||||
return ((video_id, title) if len(media_assets) == 1
|
||||
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
|
||||
for num, asset in enumerate(media_assets, 1):
|
||||
asset_url = asset.get('url')
|
||||
if not asset_url:
|
||||
continue
|
||||
formats = self._extract_akamai_formats(asset_url, video_id)
|
||||
if not formats:
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
# Some f4m streams may not work with hdcore in fragments' URLs
|
||||
for f in formats:
|
||||
extra_param = f.get('extra_param_to_segment_url')
|
||||
if extra_param and 'hdcore' in extra_param:
|
||||
del f['extra_param_to_segment_url']
|
||||
|
||||
entry_id, entry_title = video_id_and_title(num)
|
||||
duration = parse_duration(asset.get('duration'))
|
||||
subtitles = {}
|
||||
for subtitle in ('webVtt', 'timedText'):
|
||||
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
|
||||
if subtitle_url:
|
||||
subtitles.setdefault('no', []).append({
|
||||
'url': compat_urllib_parse_unquote(subtitle_url)
|
||||
})
|
||||
entries.append({
|
||||
'id': asset.get('carrierId') or entry_id,
|
||||
'title': make_title(entry_title),
|
||||
'duration': duration,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
if not entries:
|
||||
media_url = data.get('mediaUrl')
|
||||
if media_url:
|
||||
formats = self._extract_akamai_formats(media_url, video_id)
|
||||
self._sort_formats(formats)
|
||||
duration = parse_duration(data.get('duration'))
|
||||
entries = [{
|
||||
'id': video_id,
|
||||
'title': make_title(title),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}]
|
||||
|
||||
if not entries:
|
||||
MESSAGES = {
|
||||
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
|
||||
'ProgramRightsHasExpired': 'Programmet har gått ut',
|
||||
'NoProgramRights': 'Ikke tilgjengelig',
|
||||
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
|
||||
}
|
||||
message_type = data.get('messageType', '')
|
||||
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
|
||||
if 'IsGeoBlocked' in message_type:
|
||||
self.raise_geo_restricted(
|
||||
msg=MESSAGES.get('ProgramIsGeoBlocked'),
|
||||
countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
|
||||
message_type, message_type)),
|
||||
expected=True)
|
||||
|
||||
series = conviva.get('seriesName') or data.get('seriesTitle')
|
||||
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
|
||||
|
||||
season_number = None
|
||||
episode_number = None
|
||||
if data.get('mediaElementType') == 'Episode':
|
||||
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
|
||||
data.get('relativeOriginUrl', '')
|
||||
EPISODENUM_RE = [
|
||||
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
|
||||
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
|
||||
]
|
||||
season_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'season number',
|
||||
default=None, group='season'))
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
EPISODENUM_RE, _season_episode, 'episode number',
|
||||
default=None, group='episode'))
|
||||
|
||||
thumbnails = None
|
||||
images = data.get('images')
|
||||
if images and isinstance(images, dict):
|
||||
web_images = images.get('webImages')
|
||||
if isinstance(web_images, list):
|
||||
thumbnails = [{
|
||||
'url': image['imageUrl'],
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in web_images if image.get('imageUrl')]
|
||||
|
||||
description = data.get('description')
|
||||
category = data.get('mediaAnalytics', {}).get('category')
|
||||
|
||||
common_info = {
|
||||
'description': description,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'categories': [category] if category else None,
|
||||
'age_limit': parse_age_limit(data.get('legalAge')),
|
||||
'thumbnails': thumbnails,
|
||||
}
|
||||
|
||||
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
|
||||
|
||||
for entry in entries:
|
||||
entry.update(common_info)
|
||||
for f in entry['formats']:
|
||||
f['vcodec'] = vcodec
|
||||
|
||||
points = data.get('shortIndexPoints')
|
||||
if isinstance(points, list):
|
||||
chapters = []
|
||||
for next_num, point in enumerate(points, start=1):
|
||||
if not isinstance(point, dict):
|
||||
continue
|
||||
start_time = parse_duration(point.get('startPoint'))
|
||||
if start_time is None:
|
||||
continue
|
||||
end_time = parse_duration(
|
||||
data.get('duration')
|
||||
if next_num == len(points)
|
||||
else points[next_num].get('startPoint'))
|
||||
if end_time is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
'title': point.get('title'),
|
||||
})
|
||||
if chapters and len(entries) == 1:
|
||||
entries[0]['chapters'] = chapters
|
||||
|
||||
return self.playlist_result(entries, video_id, title, description)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._extract_from_mediaelement(video_id)
|
||||
|
||||
|
||||
class NRKTVEpisodeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
|
||||
'info_dict': {
|
||||
'id': 'MUHH36005220BA',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kro, krig og kjærlighet 2:6',
|
||||
'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
|
||||
'duration': 1563,
|
||||
'series': 'Hellums kro',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
'episode': '2:6',
|
||||
'age_limit': 6,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
|
||||
'info_dict': {
|
||||
'id': 'MSUI14000816AA',
|
||||
@@ -386,20 +488,28 @@ class NRKTVEpisodeIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
'skip': 'ProgramRightsHasExpired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nrk_id = self._parse_json(
|
||||
self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
|
||||
display_id)['@id']
|
||||
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
nrk_id = info.get('@id') or self._html_search_meta(
|
||||
'nrk:program-id', webpage, default=None) or self._search_regex(
|
||||
r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage,
|
||||
'nrk id')
|
||||
assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
|
||||
return self.url_result(
|
||||
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': nrk_id,
|
||||
'url': 'nrk:%s' % nrk_id,
|
||||
'ie_key': NRKIE.ie_key(),
|
||||
})
|
||||
return info
|
||||
|
||||
|
||||
class NRKTVSerieBaseIE(InfoExtractor):
|
||||
@@ -409,7 +519,7 @@ class NRKTVSerieBaseIE(InfoExtractor):
|
||||
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
|
||||
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
|
||||
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
|
||||
display_id, fatal=False)
|
||||
display_id, fatal=False, transform_source=js_to_json)
|
||||
if not config:
|
||||
return
|
||||
return try_get(
|
||||
@@ -479,6 +589,14 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
|
||||
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
|
||||
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.nrk.no/serie/blank',
|
||||
'info_dict': {
|
||||
'id': 'blank',
|
||||
'title': 'Blank',
|
||||
'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
# new layout, seasons
|
||||
'url': 'https://tv.nrk.no/serie/backstage',
|
||||
'info_dict': {
|
||||
@@ -648,7 +766,7 @@ class NRKSkoleIE(InfoExtractor):
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
|
||||
'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6',
|
||||
'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
|
||||
'info_dict': {
|
||||
'id': '6021',
|
||||
'ext': 'mp4',
|
||||
|
@@ -69,10 +69,10 @@ class NYTimesBaseIE(InfoExtractor):
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
|
||||
'ext': ext,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
|
||||
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
@@ -221,3 +221,41 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||
webpage, 'podcast data')
|
||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||
|
||||
|
||||
class NYTimesCookingIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||
'info_dict': {
|
||||
'id': '100000004756089',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1479383008,
|
||||
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||
'title': 'Cranberry Tart',
|
||||
'upload_date': '20161117',
|
||||
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||
'info_dict': {
|
||||
'id': '100000003951728',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1445509539,
|
||||
'description': 'Turkey guide',
|
||||
'upload_date': '20151022',
|
||||
'title': 'Turkey',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
|
@@ -6,12 +6,14 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -88,8 +90,11 @@ class ORFTVthekIE(InfoExtractor):
|
||||
format_id = '-'.join(format_id_list)
|
||||
ext = determine_ext(src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
if any('/geoprotection' in f['url'] for f in m3u8_formats):
|
||||
self.raise_geo_restricted()
|
||||
formats.extend(m3u8_formats)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
src, video_id, f4m_id=format_id, fatal=False))
|
||||
@@ -157,48 +162,53 @@ class ORFTVthekIE(InfoExtractor):
|
||||
class ORFRadioIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
station = mobj.group('station')
|
||||
show_date = mobj.group('date')
|
||||
show_id = mobj.group('show')
|
||||
|
||||
if station == 'fm4':
|
||||
show_id = '4%s' % show_id
|
||||
|
||||
data = self._download_json(
|
||||
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),
|
||||
show_id
|
||||
)
|
||||
'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
|
||||
% (self._API_STATION, show_id, show_date), show_id)
|
||||
|
||||
def extract_entry_dict(info, title, subtitle):
|
||||
return {
|
||||
'id': info['loopStreamId'].replace('.mp3', ''),
|
||||
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),
|
||||
entries = []
|
||||
for info in data['streams']:
|
||||
loop_stream_id = str_or_none(info.get('loopStreamId'))
|
||||
if not loop_stream_id:
|
||||
continue
|
||||
title = str_or_none(data.get('title'))
|
||||
if not title:
|
||||
continue
|
||||
start = int_or_none(info.get('start'), scale=1000)
|
||||
end = int_or_none(info.get('end'), scale=1000)
|
||||
duration = end - start if end and start else None
|
||||
entries.append({
|
||||
'id': loop_stream_id.replace('.mp3', ''),
|
||||
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||
'title': title,
|
||||
'description': subtitle,
|
||||
'duration': (info['end'] - info['start']) / 1000,
|
||||
'timestamp': info['start'] / 1000,
|
||||
'description': clean_html(data.get('subtitle')),
|
||||
'duration': duration,
|
||||
'timestamp': start,
|
||||
'ext': 'mp3',
|
||||
'series': data.get('programTitle')
|
||||
}
|
||||
|
||||
entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
|
||||
'series': data.get('programTitle'),
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': show_id,
|
||||
'title': data['title'],
|
||||
'description': data['subtitle'],
|
||||
'entries': entries
|
||||
'title': data.get('title'),
|
||||
'description': clean_html(data.get('subtitle')),
|
||||
'entries': entries,
|
||||
}
|
||||
|
||||
|
||||
class ORFFM4IE(ORFRadioIE):
|
||||
IE_NAME = 'orf:fm4'
|
||||
IE_DESC = 'radio FM4'
|
||||
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
|
||||
_API_STATION = 'fm4'
|
||||
_LOOP_STATION = 'fm4'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://fm4.orf.at/player/20170107/CC',
|
||||
'url': 'http://fm4.orf.at/player/20170107/4CC',
|
||||
'md5': '2b0be47375432a7ef104453432a19212',
|
||||
'info_dict': {
|
||||
'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
|
||||
@@ -209,7 +219,138 @@ class ORFFM4IE(ORFRadioIE):
|
||||
'timestamp': 1483819257,
|
||||
'upload_date': '20170107',
|
||||
},
|
||||
'skip': 'Shows from ORF radios are only available for 7 days.'
|
||||
'skip': 'Shows from ORF radios are only available for 7 days.',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFNOEIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:noe'
|
||||
IE_DESC = 'Radio Niederösterreich'
|
||||
_VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'noe'
|
||||
_LOOP_STATION = 'oe2n'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://noe.orf.at/player/20200423/NGM',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFWIEIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:wien'
|
||||
IE_DESC = 'Radio Wien'
|
||||
_VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'wie'
|
||||
_LOOP_STATION = 'oe2w'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://wien.orf.at/player/20200423/WGUM',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFBGLIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:burgenland'
|
||||
IE_DESC = 'Radio Burgenland'
|
||||
_VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'bgl'
|
||||
_LOOP_STATION = 'oe2b'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://burgenland.orf.at/player/20200423/BGM',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFOOEIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:oberoesterreich'
|
||||
IE_DESC = 'Radio Oberösterreich'
|
||||
_VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'ooe'
|
||||
_LOOP_STATION = 'oe2o'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://ooe.orf.at/player/20200423/OGMO',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFSTMIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:steiermark'
|
||||
IE_DESC = 'Radio Steiermark'
|
||||
_VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'stm'
|
||||
_LOOP_STATION = 'oe2st'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://steiermark.orf.at/player/20200423/STGMS',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFKTNIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:kaernten'
|
||||
IE_DESC = 'Radio Kärnten'
|
||||
_VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'ktn'
|
||||
_LOOP_STATION = 'oe2k'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFSBGIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:salzburg'
|
||||
IE_DESC = 'Radio Salzburg'
|
||||
_VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'sbg'
|
||||
_LOOP_STATION = 'oe2s'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://salzburg.orf.at/player/20200423/SGUM',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFTIRIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:tirol'
|
||||
IE_DESC = 'Radio Tirol'
|
||||
_VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'tir'
|
||||
_LOOP_STATION = 'oe2t'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://tirol.orf.at/player/20200423/TGUMO',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFVBGIE(ORFRadioIE):
|
||||
IE_NAME = 'orf:vorarlberg'
|
||||
IE_DESC = 'Radio Vorarlberg'
|
||||
_VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'vbg'
|
||||
_LOOP_STATION = 'oe2v'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
class ORFOE3IE(ORFRadioIE):
|
||||
IE_NAME = 'orf:oe3'
|
||||
IE_DESC = 'Radio Österreich 3'
|
||||
_VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'oe3'
|
||||
_LOOP_STATION = 'oe3'
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://oe3.orf.at/player/20200424/3WEK',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
|
||||
@@ -217,6 +358,8 @@ class ORFOE1IE(ORFRadioIE):
|
||||
IE_NAME = 'orf:oe1'
|
||||
IE_DESC = 'Radio Österreich 1'
|
||||
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
|
||||
_API_STATION = 'oe1'
|
||||
_LOOP_STATION = 'oe1'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://oe1.orf.at/player/20170108/456544',
|
||||
|
@@ -1,99 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class PandaTVIE(InfoExtractor):
|
||||
IE_DESC = '熊猫TV'
|
||||
_VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.panda.tv/66666',
|
||||
'info_dict': {
|
||||
'id': '66666',
|
||||
'title': 're:.+',
|
||||
'uploader': '刘杀鸡',
|
||||
'ext': 'flv',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Live stream is offline',
|
||||
}, {
|
||||
'url': 'https://www.panda.tv/66666',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
config = self._download_json(
|
||||
'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
|
||||
|
||||
error_code = config.get('errno', 0)
|
||||
if error_code != 0:
|
||||
raise ExtractorError(
|
||||
'%s returned error %s: %s'
|
||||
% (self.IE_NAME, error_code, config['errmsg']),
|
||||
expected=True)
|
||||
|
||||
data = config['data']
|
||||
video_info = data['videoinfo']
|
||||
|
||||
# 2 = live, 3 = offline
|
||||
if video_info.get('status') != '2':
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
|
||||
title = data['roominfo']['name']
|
||||
uploader = data.get('hostinfo', {}).get('name')
|
||||
room_key = video_info['room_key']
|
||||
stream_addr = video_info.get(
|
||||
'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
|
||||
|
||||
# Reverse engineered from web player swf
|
||||
# (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
|
||||
# writing).
|
||||
plflag0, plflag1 = video_info['plflag'].split('_')
|
||||
plflag0 = int(plflag0) - 1
|
||||
if plflag1 == '21':
|
||||
plflag0 = 10
|
||||
plflag1 = '4'
|
||||
live_panda = 'live_panda' if plflag0 < 1 else ''
|
||||
|
||||
plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
|
||||
sign = plflag_auth['auth']['sign']
|
||||
ts = plflag_auth['auth']['time']
|
||||
rid = plflag_auth['auth']['rid']
|
||||
|
||||
quality_key = qualities(['OD', 'HD', 'SD'])
|
||||
suffix = ['_small', '_mid', '']
|
||||
formats = []
|
||||
for k, v in stream_addr.items():
|
||||
if v != '1':
|
||||
continue
|
||||
quality = quality_key(k)
|
||||
if quality <= 0:
|
||||
continue
|
||||
for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
|
||||
formats.append({
|
||||
'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
|
||||
% (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
|
||||
'format_id': '%s-%s' % (k, ext),
|
||||
'quality': quality,
|
||||
'source_preference': pref,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title),
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user