mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-16 13:18:36 +09:00
Compare commits
346 Commits
2019.05.11
...
2019.11.05
Author | SHA1 | Date | |
---|---|---|---|
![]() |
ea07412ebf | ||
![]() |
1a4e4b0bfe | ||
![]() |
20218040db | ||
![]() |
c69e71733d | ||
![]() |
3e49083604 | ||
![]() |
2349255abd | ||
![]() |
e452345fc5 | ||
![]() |
bf45295c53 | ||
![]() |
ef382405c6 | ||
![]() |
a6e6673e82 | ||
![]() |
564275e26f | ||
![]() |
726e8eef59 | ||
![]() |
e54924c46f | ||
![]() |
5e36b63486 | ||
![]() |
9249c50c18 | ||
![]() |
79b35e7c15 | ||
![]() |
836bfcb54e | ||
![]() |
4c95fcf9e8 | ||
![]() |
152f22920d | ||
![]() |
20cc7c082b | ||
![]() |
e987ce4bda | ||
![]() |
d439989215 | ||
![]() |
274bf5e4c5 | ||
![]() |
e993f1a095 | ||
![]() |
3cf70bf159 | ||
![]() |
237513e801 | ||
![]() |
8040a0d35e | ||
![]() |
45f4a43389 | ||
![]() |
9a621ddc3a | ||
![]() |
c56b2ac43c | ||
![]() |
8989349e6d | ||
![]() |
7455832f31 | ||
![]() |
c4bd9cb7bb | ||
![]() |
cae0bbc538 | ||
![]() |
53896ca5be | ||
![]() |
0d7392e68b | ||
![]() |
aef9f87ea4 | ||
![]() |
dd90a21c28 | ||
![]() |
01358b9fc1 | ||
![]() |
3cdcebf547 | ||
![]() |
cfabc50598 | ||
![]() |
0086726e86 | ||
![]() |
83e49259bf | ||
![]() |
895e5c03db | ||
![]() |
702984eca9 | ||
![]() |
b3c2fa6dad | ||
![]() |
831b732da1 | ||
![]() |
3e252cca0e | ||
![]() |
0f9d53566a | ||
![]() |
80c2126e80 | ||
![]() |
71fa0b04f9 | ||
![]() |
dd90451f0f | ||
![]() |
548c395716 | ||
![]() |
0b98f3a751 | ||
![]() |
235dbb434b | ||
![]() |
42cd0824b3 | ||
![]() |
3c989818e7 | ||
![]() |
416c3ca7f5 | ||
![]() |
162bcc68dc | ||
![]() |
07154c7930 | ||
![]() |
0c2d10d225 | ||
![]() |
820215f0e3 | ||
![]() |
b4818e3c7a | ||
![]() |
2297c0d7d9 | ||
![]() |
824fa51165 | ||
![]() |
34e3885bc9 | ||
![]() |
59296bae7e | ||
![]() |
755541a4c8 | ||
![]() |
86f63633c8 | ||
![]() |
0001157594 | ||
![]() |
bc48773ed4 | ||
![]() |
d07866f13e | ||
![]() |
2b115b9460 | ||
![]() |
e29e96a9f5 | ||
![]() |
6d394a66f5 | ||
![]() |
7815d6b743 | ||
![]() |
173190f5e3 | ||
![]() |
974311b5aa | ||
![]() |
30eb05cb41 | ||
![]() |
2af01c0293 | ||
![]() |
7e05df71b7 | ||
![]() |
a1ee23e98f | ||
![]() |
311ee45731 | ||
![]() |
c317b6163b | ||
![]() |
2765c47a8c | ||
![]() |
07b50f616e | ||
![]() |
1907f06e7b | ||
![]() |
d4bb825b83 | ||
![]() |
560d3b7d7c | ||
![]() |
4bf568d36c | ||
![]() |
05446d483d | ||
![]() |
3a37f2c3be | ||
![]() |
0b87beefe6 | ||
![]() |
fd4db1ebc2 | ||
![]() |
b64045cd2a | ||
![]() |
c2915de82e | ||
![]() |
4e72d02f39 | ||
![]() |
76e510b92c | ||
![]() |
9679a62a28 | ||
![]() |
ca20b13048 | ||
![]() |
894b3826f5 | ||
![]() |
aaf9d904aa | ||
![]() |
25e911a968 | ||
![]() |
74bc299453 | ||
![]() |
2906631e12 | ||
![]() |
326ae4ff96 | ||
![]() |
72fd4d0c6a | ||
![]() |
f4b865c613 | ||
![]() |
412f44f4b3 | ||
![]() |
6483fbd336 | ||
![]() |
8130ac42e5 | ||
![]() |
cb3e4a2947 | ||
![]() |
2a88a0c44d | ||
![]() |
33c1c7d80f | ||
![]() |
21d3c21e62 | ||
![]() |
a373befa25 | ||
![]() |
df63cafe49 | ||
![]() |
d06daf23da | ||
![]() |
8e9fdcbe27 | ||
![]() |
666d808e70 | ||
![]() |
7d327fea5b | ||
![]() |
4e3f1f0469 | ||
![]() |
4bc15a68d1 | ||
![]() |
edb2820ca5 | ||
![]() |
6cf6b357f5 | ||
![]() |
f455a934e9 | ||
![]() |
d9d3098675 | ||
![]() |
1cb812d3c2 | ||
![]() |
6fd26a7d4a | ||
![]() |
9cf26b6e1d | ||
![]() |
20e11b70ac | ||
![]() |
e1f692f0b3 | ||
![]() |
2f851a7d7d | ||
![]() |
4878759f3b | ||
![]() |
303d3e142c | ||
![]() |
bd10b229c0 | ||
![]() |
035c7a59e8 | ||
![]() |
bf1317d257 | ||
![]() |
bff90fc518 | ||
![]() |
31dbd054c8 | ||
![]() |
66d04c74e0 | ||
![]() |
d7da1e37c7 | ||
![]() |
f620d0d860 | ||
![]() |
79dd8884bb | ||
![]() |
df228355fd | ||
![]() |
8945b10f6e | ||
![]() |
7cb51b5daf | ||
![]() |
d78657fd18 | ||
![]() |
cc73d5ad15 | ||
![]() |
71f47617c8 | ||
![]() |
3f46a25a97 | ||
![]() |
9d058b3206 | ||
![]() |
b500955a58 | ||
![]() |
acc86c9a97 | ||
![]() |
b72305f078 | ||
![]() |
494d664e67 | ||
![]() |
d1fcf255c5 | ||
![]() |
183a18c4e7 | ||
![]() |
393cc31d5e | ||
![]() |
0add33abcb | ||
![]() |
0326bcb6c1 | ||
![]() |
def849e0e6 | ||
![]() |
69611a1616 | ||
![]() |
351f37c022 | ||
![]() |
3bce4ff7d9 | ||
![]() |
ffddb11264 | ||
![]() |
64b6a4e91e | ||
![]() |
b3d39be239 | ||
![]() |
1357734978 | ||
![]() |
eb9c9c74a6 | ||
![]() |
5efbc1366f | ||
![]() |
995f319b06 | ||
![]() |
d9d3a5a816 | ||
![]() |
4f2d735803 | ||
![]() |
2e9522b061 | ||
![]() |
be306d6a31 | ||
![]() |
33b529fabd | ||
![]() |
07f3a05c87 | ||
![]() |
535111657b | ||
![]() |
826dcff99c | ||
![]() |
9a37ff82f1 | ||
![]() |
766c4f6090 | ||
![]() |
7279163412 | ||
![]() |
07ab44c420 | ||
![]() |
2c8b1a21e8 | ||
![]() |
c2d125d99f | ||
![]() |
85c2c4b4ab | ||
![]() |
8614a03f9c | ||
![]() |
8dbf751aa2 | ||
![]() |
90634acfcf | ||
![]() |
eaba9dd6c2 | ||
![]() |
843ad1796b | ||
![]() |
608b8a4300 | ||
![]() |
ab794a553c | ||
![]() |
3b446ab351 | ||
![]() |
13a75688a5 | ||
![]() |
2e18adec98 | ||
![]() |
9c1da4a9f9 | ||
![]() |
5e1c39ac85 | ||
![]() |
1824bfdcdf | ||
![]() |
2f1991ff14 | ||
![]() |
8b4a0ebf10 | ||
![]() |
f61496863d | ||
![]() |
799756a3b3 | ||
![]() |
7d4dd3e5b4 | ||
![]() |
f2a213d025 | ||
![]() |
791d2e8117 | ||
![]() |
2adedc477e | ||
![]() |
898238e9f8 | ||
![]() |
ce80cacefd | ||
![]() |
0250161c52 | ||
![]() |
364a2cb658 | ||
![]() |
2fe074a960 | ||
![]() |
c452790a79 | ||
![]() |
d89a0a8026 | ||
![]() |
ba036333bf | ||
![]() |
b7ef93f0ab | ||
![]() |
f9eeeda31c | ||
![]() |
5f562bd4bb | ||
![]() |
b99f11a56b | ||
![]() |
4a71ef6da6 | ||
![]() |
fd95105ed4 | ||
![]() |
c72dc20d09 | ||
![]() |
272355c172 | ||
![]() |
57227618fe | ||
![]() |
0441d6266c | ||
![]() |
82f68e4a01 | ||
![]() |
d4ece5d359 | ||
![]() |
16d3672ad7 | ||
![]() |
0dd58a523f | ||
![]() |
27019dbb4b | ||
![]() |
baf67a604d | ||
![]() |
0d1f4af39d | ||
![]() |
7612406bf9 | ||
![]() |
4dcd4b7b16 | ||
![]() |
5fc0896168 | ||
![]() |
e4d53148f5 | ||
![]() |
cfe781d4fa | ||
![]() |
253289656f | ||
![]() |
4b30282616 | ||
![]() |
c9b0564ac1 | ||
![]() |
25d71fb058 | ||
![]() |
a6389abfd7 | ||
![]() |
d18003a141 | ||
![]() |
d1850c1a97 | ||
![]() |
c9fa84d88e | ||
![]() |
a30c2f4055 | ||
![]() |
5ae9b8b3a3 | ||
![]() |
cdb7c7d147 | ||
![]() |
2da4316e48 | ||
![]() |
313877c6a2 | ||
![]() |
e61ac1a09c | ||
![]() |
ff0f4cfeba | ||
![]() |
1335bf10f6 | ||
![]() |
c8343f0a43 | ||
![]() |
d1e4116427 | ||
![]() |
9baf69af45 | ||
![]() |
918398092c | ||
![]() |
4e2491f066 | ||
![]() |
976e1ff7f9 | ||
![]() |
5e3da0d42b | ||
![]() |
c560680247 | ||
![]() |
f7a147e3b6 | ||
![]() |
8c8cae91ec | ||
![]() |
232331c0d2 | ||
![]() |
4f71473ef1 | ||
![]() |
6625bf200d | ||
![]() |
f562994660 | ||
![]() |
509bcec37b | ||
![]() |
1d83e9bd4b | ||
![]() |
27cef8885d | ||
![]() |
3031b7c4ed | ||
![]() |
695720ebe8 | ||
![]() |
2605043d6d | ||
![]() |
091c9b4316 | ||
![]() |
9634de178d | ||
![]() |
1f7a563ab0 | ||
![]() |
21b08463a7 | ||
![]() |
31ce6e9966 | ||
![]() |
1c11204056 | ||
![]() |
9c2aaac268 | ||
![]() |
d415957dbc | ||
![]() |
4681441d2f | ||
![]() |
9842d29d66 | ||
![]() |
bc6438c092 | ||
![]() |
abefc03f51 | ||
![]() |
c40714cdee | ||
![]() |
7c24a58bdb | ||
![]() |
b85eae0f05 | ||
![]() |
28cc2241e4 | ||
![]() |
8361e7f934 | ||
![]() |
427cc21531 | ||
![]() |
f991dd2bfb | ||
![]() |
35c2dd48d9 | ||
![]() |
3b2fd09596 | ||
![]() |
e35fc5ebc7 | ||
![]() |
dbb1886114 | ||
![]() |
c2ee6fa66a | ||
![]() |
4831ef7fe4 | ||
![]() |
178663df52 | ||
![]() |
ef19739e64 | ||
![]() |
01b517a20a | ||
![]() |
f4c99cd635 | ||
![]() |
e75220b11a | ||
![]() |
2efefddafd | ||
![]() |
bf3c932663 | ||
![]() |
4c78c3d700 | ||
![]() |
2e11e51c04 | ||
![]() |
1a01639bf9 | ||
![]() |
59ca17b1c8 | ||
![]() |
c94c121a99 | ||
![]() |
0c84002650 | ||
![]() |
c5eb75b35a | ||
![]() |
0e2dd3fcbc | ||
![]() |
26a87972a9 | ||
![]() |
33b2218b2f | ||
![]() |
ead467a9c1 | ||
![]() |
0d29751890 | ||
![]() |
25b83c2a0e | ||
![]() |
3fe774722b | ||
![]() |
f4cc2ca503 | ||
![]() |
11ec06de7f | ||
![]() |
53cd37bac5 | ||
![]() |
f856816b94 | ||
![]() |
8af49fc276 | ||
![]() |
9c5f2988b9 | ||
![]() |
afd4985f72 | ||
![]() |
2c53c0ebc6 | ||
![]() |
bbf1defe58 | ||
![]() |
186d185b6e | ||
![]() |
612300a686 | ||
![]() |
ea75382094 | ||
![]() |
e438e81469 | ||
![]() |
0e6f914b3b | ||
![]() |
6ab30ff50b | ||
![]() |
42c971341b | ||
![]() |
ce2fe4c01c | ||
![]() |
a9e03736df | ||
![]() |
e3c1266f49 | ||
![]() |
82e91d20a0 | ||
![]() |
170d644440 | ||
![]() |
5831742840 | ||
![]() |
a277dd33eb | ||
![]() |
0e0bfd334c | ||
![]() |
e6a25fea23 | ||
![]() |
b7df8f90a7 |
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.05.11
|
||||
[debug] youtube-dl version 2019.11.05
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@@ -19,7 +19,7 @@ labels: 'site-support-request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a new site support request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||
|
@@ -18,13 +18,13 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a site feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||
|
||||
|
||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@@ -18,7 +18,7 @@ title: ''
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a broken site support issue
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2019.05.11
|
||||
[debug] youtube-dl version 2019.11.05
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@@ -19,13 +19,13 @@ labels: 'request'
|
||||
|
||||
<!--
|
||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.05.11. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.11.05. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||
- Finally, put x into all relevant boxes (like this [x])
|
||||
-->
|
||||
|
||||
- [ ] I'm reporting a feature request
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.05.11**
|
||||
- [ ] I've verified that I'm running youtube-dl version **2019.11.05**
|
||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||
|
||||
|
||||
|
@@ -9,6 +9,7 @@ python:
|
||||
- "3.6"
|
||||
- "pypy"
|
||||
- "pypy3"
|
||||
dist: trusty
|
||||
env:
|
||||
- YTDL_TEST_SET=core
|
||||
- YTDL_TEST_SET=download
|
||||
|
@@ -339,6 +339,72 @@ Incorrect:
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Inline values
|
||||
|
||||
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
|
||||
### Collapse fallbacks
|
||||
|
||||
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||
|
||||
#### Example
|
||||
|
||||
Good:
|
||||
|
||||
```python
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
Unwieldy:
|
||||
|
||||
```python
|
||||
description = (
|
||||
self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, default=None)
|
||||
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||
```
|
||||
|
||||
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||
|
||||
### Trailing parentheses
|
||||
|
||||
Always move trailing parentheses after the last argument.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list,
|
||||
)
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
437
ChangeLog
437
ChangeLog
@@ -1,3 +1,440 @@
|
||||
version 2019.11.05
|
||||
|
||||
Extractors
|
||||
+ [scte] Add support for learning.scte.org (#22975)
|
||||
+ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
|
||||
* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
|
||||
* [jamendo] Improve extraction
|
||||
* Fix album extraction (#18564)
|
||||
* Improve metadata extraction (#18565, #21379)
|
||||
* [mediaset] Relax URL guid matching (#18352)
|
||||
+ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
|
||||
* [telegraaf] Fix extraction
|
||||
+ [bellmedia] Add support for marilyn.ca videos (#22193)
|
||||
* [stv] Fix extraction (#22928)
|
||||
- [iconosquare] Remove extractor
|
||||
- [keek] Remove extractor
|
||||
- [gameone] Remove extractor (#21778)
|
||||
- [flipagram] Remove extractor
|
||||
- [bambuser] Remove extractor
|
||||
* [wistia] Reduce embed extraction false positives
|
||||
+ [wistia] Add support for inline embeds (#22931)
|
||||
- [go90] Remove extractor
|
||||
* [kakao] Remove raw request
|
||||
+ [kakao] Extract format total bitrate
|
||||
* [daum] Fix VOD and Clip extracton (#15015)
|
||||
* [kakao] Improve extraction
|
||||
+ Add support for embed URLs
|
||||
+ Add support for Kakao Legacy vid based embed URLs
|
||||
* Only extract fields used for extraction
|
||||
* Strip description and extract tags
|
||||
* [mixcloud] Fix cloudcast data extraction (#22821)
|
||||
* [yahoo] Improve extraction
|
||||
+ Add support for live streams (#3597, #3779, #22178)
|
||||
* Bypass cookie consent page for european domains (#16948, #22576)
|
||||
+ Add generic support for embeds (#20332)
|
||||
* [tv2] Fix and improve extraction (#22787)
|
||||
+ [tv2dk] Add support for TV2 DK sites
|
||||
* [onet] Improve extraction …
|
||||
+ Add support for onet100.vod.pl
|
||||
+ Extract m3u8 formats
|
||||
* Correct audio only format info
|
||||
* [fox9] Fix extraction
|
||||
|
||||
|
||||
version 2019.10.29
|
||||
|
||||
Core
|
||||
* [utils] Actualize major IPv4 address blocks per country
|
||||
|
||||
Extractors
|
||||
+ [go] Add support for abc.com and freeform.com (#22823, #22864)
|
||||
+ [mtv] Add support for mtvjapan.com
|
||||
* [mtv] Fix extraction for mtv.de (#22113)
|
||||
* [videodetective] Fix extraction
|
||||
* [internetvideoarchive] Fix extraction
|
||||
* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
|
||||
- [hark] Remove extractor
|
||||
- [tutv] Remove extractor
|
||||
- [learnr] Remove extractor
|
||||
- [macgamestore] Remove extractor
|
||||
* [la7] Update Kaltura service URL (#22358)
|
||||
* [thesun] Fix extraction (#16966)
|
||||
- [makertv] Remove extractor
|
||||
+ [tenplay] Add support for 10play.com.au (#21446)
|
||||
* [soundcloud] Improve extraction
|
||||
* Improve format extraction (#22123)
|
||||
+ Extract uploader_id and uploader_url (#21916)
|
||||
+ Extract all known thumbnails (#19071, #20659)
|
||||
* Fix extration for private playlists (#20976)
|
||||
+ Add support for playlist embeds (#20976)
|
||||
* Skip preview formats (#22806)
|
||||
* [dplay] Improve extraction
|
||||
+ Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
|
||||
* Fix it.dplay.com extraction (#22826)
|
||||
+ Extract creator, tags and thumbnails
|
||||
* Handle playback API call errors
|
||||
+ [discoverynetworks] Add support for dplay.co.uk
|
||||
* [vk] Improve extraction
|
||||
+ Add support for Odnoklassniki embeds
|
||||
+ Extract more videos from user lists (#4470)
|
||||
+ Fix wall post audio extraction (#18332)
|
||||
* Improve error detection (#22568)
|
||||
+ [odnoklassniki] Add support for embeds
|
||||
* [puhutv] Improve extraction
|
||||
* Fix subtitles extraction
|
||||
* Transform HLS URLs to HTTP URLs
|
||||
* Improve metadata extraction
|
||||
* [ceskatelevize] Skip DRM media
|
||||
+ [facebook] Extract subtitles (#22777)
|
||||
* [globo] Handle alternative hash signing method
|
||||
|
||||
|
||||
version 2019.10.22
|
||||
|
||||
Core
|
||||
* [utils] Improve subtitles_filename (#22753)
|
||||
|
||||
Extractors
|
||||
* [facebook] Bypass download rate limits (#21018)
|
||||
+ [contv] Add support for contv.com
|
||||
- [viewster] Remove extractor
|
||||
* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
|
||||
* Update the list of domains
|
||||
+ Add support for aa-encoded video data
|
||||
* Improve jwplayer format extraction
|
||||
+ Add support for Clappr sources
|
||||
* [mangomolo] Fix video format extraction and add support for player URLs
|
||||
* [audioboom] Improve metadata extraction
|
||||
* [twitch] Update VOD URL matching (#22395, #22727)
|
||||
- [mit] Remove support for video.mit.edu (#22403)
|
||||
- [servingsys] Remove extractor (#22639)
|
||||
* [dumpert] Fix extraction (#22428, #22564)
|
||||
* [atresplayer] Fix extraction (#16277, #16716)
|
||||
|
||||
|
||||
version 2019.10.16
|
||||
|
||||
Core
|
||||
* [extractor/common] Make _is_valid_url more relaxed
|
||||
|
||||
Extractors
|
||||
* [vimeo] Improve album videos id extraction (#22599)
|
||||
+ [globo] Extract subtitles (#22713)
|
||||
* [bokecc] Improve player params extraction (#22638)
|
||||
* [nexx] Handle result list (#22666)
|
||||
* [vimeo] Fix VHX embed extraction
|
||||
* [nbc] Switch to graphql API (#18581, #22693, #22701)
|
||||
- [vessel] Remove extractor
|
||||
- [promptfile] Remove extractor (#6239)
|
||||
* [kaltura] Fix service URL extraction (#22658)
|
||||
* [kaltura] Fix embed info strip (#22658)
|
||||
* [globo] Fix format extraction (#20319)
|
||||
* [redtube] Improve metadata extraction (#22492, #22615)
|
||||
* [pornhub:uservideos:upload] Fix extraction (#22619)
|
||||
+ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
|
||||
- [wimp] Remove extractor (#22088, #22091)
|
||||
+ [gfycat] Extend URL regular expression (#22225)
|
||||
+ [chaturbate] Extend URL regular expression (#22309)
|
||||
* [peertube] Update instances (#22414)
|
||||
+ [telequebec] Add support for coucou.telequebec.tv (#22482)
|
||||
+ [xvideos] Extend URL regular expression (#22471)
|
||||
- [youtube] Remove support for invidious.enkirton.net (#22543)
|
||||
+ [openload] Add support for oload.monster (#22592)
|
||||
* [nrktv:seriebase] Fix extraction (#22596)
|
||||
+ [youtube] Add support for yt.lelux.fi (#22597)
|
||||
* [orf:tvthek] Make manifest requests non fatal (#22578)
|
||||
* [teachable] Skip login when already logged in (#22572)
|
||||
* [viewlift] Improve extraction (#22545)
|
||||
* [nonktube] Fix extraction (#22544)
|
||||
|
||||
|
||||
version 2019.09.28
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
|
||||
|
||||
Extractors
|
||||
* [vk] Fix extraction (#22522)
|
||||
* [heise] Fix kaltura embeds extraction (#22514)
|
||||
* [ted] Check for resources validity and extract subtitled downloads (#22513)
|
||||
+ [youtube] Add support for
|
||||
owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
|
||||
+ [nhk] Add support for clips
|
||||
* [nhk] Fix video extraction (#22249, #22353)
|
||||
* [byutv] Fix extraction (#22070)
|
||||
+ [openload] Add support for oload.online (#22304)
|
||||
+ [youtube] Add support for invidious.drycat.fr (#22451)
|
||||
* [jwplatfom] Do not match video URLs (#20596, #22148)
|
||||
* [youtube:playlist] Unescape playlist uploader (#22483)
|
||||
+ [bilibili] Add support audio albums and songs (#21094)
|
||||
+ [instagram] Add support for tv URLs
|
||||
+ [mixcloud] Allow uppercase letters in format URLs (#19280)
|
||||
* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
|
||||
#12842, #13912, #15669, #16303)
|
||||
* [hotstar] Use native HLS downloader by default
|
||||
+ [hotstar] Extract more formats (#22323)
|
||||
* [9now] Fix extraction (#22361)
|
||||
* [zdf] Bypass geo restriction
|
||||
+ [tv4] Extract series metadata
|
||||
* [tv4] Fix extraction (#22443)
|
||||
|
||||
|
||||
version 2019.09.12.1
|
||||
|
||||
Extractors
|
||||
* [youtube] Remove quality and tbr for itag 43 (#22372)
|
||||
|
||||
|
||||
version 2019.09.12
|
||||
|
||||
Extractors
|
||||
* [youtube] Quick extraction tempfix (#22367, #22163)
|
||||
|
||||
|
||||
version 2019.09.01
|
||||
|
||||
Core
|
||||
+ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
|
||||
#21859)
|
||||
+ [downloader/external] Respect mtime option for aria2c (#22242)
|
||||
|
||||
Extractors
|
||||
+ [xhamster:user] Add support for user pages (#16330, #18454)
|
||||
+ [xhamster] Add support for more domains
|
||||
+ [verystream] Add support for woof.tube (#22217)
|
||||
+ [dailymotion] Add support for lequipe.fr (#21328, #22152)
|
||||
+ [openload] Add support for oload.vip (#22205)
|
||||
+ [bbccouk] Extend URL regular expression (#19200)
|
||||
+ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
|
||||
* [safari] Fix authentication (#22161, #22184)
|
||||
* [usanetwork] Fix extraction (#22105)
|
||||
+ [einthusan] Add support for einthusan.ca (#22171)
|
||||
* [youtube] Improve unavailable message extraction (#22117)
|
||||
+ [piksel] Extract subtitles (#20506)
|
||||
|
||||
|
||||
version 2019.08.13
|
||||
|
||||
Core
|
||||
* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
|
||||
* [YoutubeDL] Check annotations availability (#18582)
|
||||
|
||||
Extractors
|
||||
* [youtube:playlist] Improve flat extraction (#21927)
|
||||
* [youtube] Fix annotations extraction (#22045)
|
||||
+ [discovery] Extract series meta field (#21808)
|
||||
* [youtube] Improve error detection (#16445)
|
||||
* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
|
||||
+ [roosterteeth] Add support for watch URLs
|
||||
* [discovery] Limit video data by show slug (#21980)
|
||||
|
||||
|
||||
version 2019.08.02
|
||||
|
||||
Extractors
|
||||
+ [tvigle] Add support for HLS and DASH formats (#21967)
|
||||
* [tvigle] Fix extraction (#21967)
|
||||
+ [yandexvideo] Add support for DASH formats (#21971)
|
||||
* [discovery] Use API call for video data extraction (#21808)
|
||||
+ [mgtv] Extract format_note (#21881)
|
||||
* [tvn24] Fix metadata extraction (#21833, #21834)
|
||||
* [dlive] Relax URL regular expression (#21909)
|
||||
+ [openload] Add support for oload.best (#21913)
|
||||
* [youtube] Improve metadata extraction for age gate content (#21943)
|
||||
|
||||
|
||||
version 2019.07.30
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix and improve title and description extraction (#21934)
|
||||
|
||||
|
||||
version 2019.07.27
|
||||
|
||||
Extractors
|
||||
+ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
|
||||
+ [discovery] Add support go.discovery.com URLs
|
||||
* [youtube:playlist] Relax video regular expression (#21844)
|
||||
* [generic] Restrict --default-search schemeless URLs detection pattern
|
||||
(#21842)
|
||||
* [vrv] Fix CMS signing query extraction (#21809)
|
||||
|
||||
|
||||
version 2019.07.16
|
||||
|
||||
Extractors
|
||||
+ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
|
||||
(#21281, #21290)
|
||||
* [kaltura] Check source format URL (#21290)
|
||||
* [ctsnews] Fix YouTube embeds extraction (#21678)
|
||||
+ [einthusan] Add support for einthusan.com (#21748, #21775)
|
||||
+ [youtube] Add support for invidious.mastodon.host (#21777)
|
||||
+ [gfycat] Extend URL regular expression (#21779, #21780)
|
||||
* [youtube] Restrict is_live extraction (#21782)
|
||||
|
||||
|
||||
version 2019.07.14
|
||||
|
||||
Extractors
|
||||
* [porn91] Fix extraction (#21312)
|
||||
+ [yandexmusic] Extract track number and disk number (#21421)
|
||||
+ [yandexmusic] Add support for multi disk albums (#21420, #21421)
|
||||
* [lynda] Handle missing subtitles (#20490, #20513)
|
||||
+ [youtube] Add more invidious instances to URL regular expression (#21694)
|
||||
* [twitter] Improve uploader id extraction (#21705)
|
||||
* [spankbang] Fix and improve metadata extraction
|
||||
* [spankbang] Fix extraction (#21763, #21764)
|
||||
+ [dlive] Add support for dlive.tv (#18080)
|
||||
+ [livejournal] Add support for livejournal.com (#21526)
|
||||
* [roosterteeth] Fix free episode extraction (#16094)
|
||||
* [dbtv] Fix extraction
|
||||
* [bellator] Fix extraction
|
||||
- [rudo] Remove extractor (#18430, #18474)
|
||||
* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224)
|
||||
* [bleacherreport] Fix Bleacher Report CMS extraction
|
||||
* [espn] Fix fivethirtyeight.com extraction
|
||||
* [5tv] Relax video URL regular expression and support https URLs
|
||||
* [youtube] Fix is_live extraction (#21734)
|
||||
* [youtube] Fix authentication (#11270)
|
||||
|
||||
|
||||
version 2019.07.12
|
||||
|
||||
Core
|
||||
+ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016)
|
||||
|
||||
Extractors
|
||||
+ [mgtv] Pass Referer HTTP header for format URLs (#21726)
|
||||
+ [beeg] Add support for api/v6 v2 URLs without t argument (#21701)
|
||||
* [voxmedia:volume] Improvevox embed extraction (#16846)
|
||||
* [funnyordie] Move extraction to VoxMedia extractor (#16846)
|
||||
* [gameinformer] Fix extraction (#8895, #15363, #17206)
|
||||
* [funk] Fix extraction (#17915)
|
||||
* [packtpub] Relax lesson URL regular expression (#21695)
|
||||
* [packtpub] Fix extraction (#21268)
|
||||
* [philharmoniedeparis] Relax URL regular expression (#21672)
|
||||
* [peertube] Detect embed URLs in generic extraction (#21666)
|
||||
* [mixer:vod] Relax URL regular expression (#21657, #21658)
|
||||
+ [lecturio] Add support id based URLs (#21630)
|
||||
+ [go] Add site info for disneynow (#21613)
|
||||
* [ted] Restrict info regular expression (#21631)
|
||||
* [twitch:vod] Actualize m3u8 URL (#21538, #21607)
|
||||
* [vzaar] Fix videos with empty title (#21606)
|
||||
* [tvland] Fix extraction (#21384)
|
||||
* [arte] Clean extractor (#15583, #21614)
|
||||
|
||||
|
||||
version 2019.07.02
|
||||
|
||||
Core
|
||||
+ [utils] Introduce random_user_agent and use as default User-Agent (#21546)
|
||||
|
||||
Extractors
|
||||
+ [vevo] Add support for embed.vevo.com URLs (#21565)
|
||||
+ [openload] Add support for oload.biz (#21574)
|
||||
* [xiami] Update API base URL (#21575)
|
||||
* [yourporn] Fix extraction (#21585)
|
||||
+ [acast] Add support for URLs with episode id (#21444)
|
||||
+ [dailymotion] Add support for DM.player embeds
|
||||
* [soundcloud] Update client id
|
||||
|
||||
|
||||
version 2019.06.27
|
||||
|
||||
Extractors
|
||||
+ [go] Add support for disneynow.com (#21528)
|
||||
* [mixer:vod] Relax URL regular expression (#21531, #21536)
|
||||
* [drtv] Relax URL regular expression
|
||||
* [fusion] Fix extraction (#17775, #21269)
|
||||
- [nfb] Remove extractor (#21518)
|
||||
+ [beeg] Add support for api/v6 v2 URLs (#21511)
|
||||
+ [brightcove:new] Add support for playlists (#21331)
|
||||
+ [openload] Add support for oload.life (#21495)
|
||||
* [vimeo:channel,group] Make title extraction non fatal
|
||||
* [vimeo:likes] Implement extrator in terms of channel extractor (#21493)
|
||||
+ [pornhub] Add support for more paged video sources
|
||||
+ [pornhub] Add support for downloading single pages and search pages (#15570)
|
||||
* [pornhub] Rework extractors (#11922, #16078, #17454, #17936)
|
||||
+ [youtube] Add another signature function pattern
|
||||
* [tf1] Fix extraction (#21365, #21372)
|
||||
* [crunchyroll] Move Accept-Language workaround to video extractor since
|
||||
it causes playlists not to list any videos
|
||||
* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443)
|
||||
|
||||
|
||||
version 2019.06.21
|
||||
|
||||
Core
|
||||
* [utils] Restrict parse_codecs and add theora as known vcodec (#21381)
|
||||
|
||||
Extractors
|
||||
* [youtube] Update signature function patterns (#21469, #21476)
|
||||
* [youtube] Make --write-annotations non fatal (#21452)
|
||||
+ [sixplay] Add support for rtlmost.hu (#21405)
|
||||
* [youtube] Hardcode codec metadata for av01 video only formats (#21381)
|
||||
* [toutv] Update client key (#21370)
|
||||
+ [biqle] Add support for new embed domain
|
||||
* [cbs] Improve DRM protected videos detection (#21339)
|
||||
|
||||
|
||||
version 2019.06.08
|
||||
|
||||
Core
|
||||
* [downloader/common] Improve rate limit (#21301)
|
||||
* [utils] Improve strip_or_none
|
||||
* [extractor/common] Strip src attribute for HTML5 entries code (#18485,
|
||||
#21169)
|
||||
|
||||
Extractors
|
||||
* [ted] Fix playlist extraction (#20844, #21032)
|
||||
* [vlive:playlist] Fix video extraction when no playlist is found (#20590)
|
||||
+ [vlive] Add CH+ support (#16887, #21209)
|
||||
+ [openload] Add support for oload.website (#21329)
|
||||
+ [tvnow] Extract HD formats (#21201)
|
||||
+ [redbulltv] Add support for rrn:content URLs (#21297)
|
||||
* [youtube] Fix average rating extraction (#21304)
|
||||
+ [bitchute] Extract HTML5 formats (#21306)
|
||||
* [cbsnews] Fix extraction (#9659, #15397)
|
||||
* [vvvvid] Relax URL regular expression (#21299)
|
||||
+ [prosiebensat1] Add support for new API (#21272)
|
||||
+ [vrv] Extract adaptive_hls formats (#21243)
|
||||
* [viki] Switch to HTTPS (#21001)
|
||||
* [LiveLeak] Check if the original videos exist (#21206, #21208)
|
||||
* [rtp] Fix extraction (#15099)
|
||||
* [youtube] Improve DRM protected videos detection (#1774)
|
||||
+ [srgssrplay] Add support for popupvideoplayer URLs (#21155)
|
||||
+ [24video] Add support for porno.24video.net (#21194)
|
||||
+ [24video] Add support for 24video.site (#21193)
|
||||
- [pornflip] Remove extractor
|
||||
- [criterion] Remove extractor (#21195)
|
||||
* [pornhub] Use HTTPS (#21061)
|
||||
* [bitchute] Fix uploader extraction (#21076)
|
||||
* [streamcloud] Reduce waiting time to 6 seconds (#21092)
|
||||
- [novamov] Remove extractors (#21077)
|
||||
+ [openload] Add support for oload.press (#21135)
|
||||
* [vivo] Fix extraction (#18906, #19217)
|
||||
|
||||
|
||||
version 2019.05.20
|
||||
|
||||
Core
|
||||
+ [extractor/common] Move workaround for applying first Set-Cookie header
|
||||
into a separate _apply_first_set_cookie_header method
|
||||
|
||||
Extractors
|
||||
* [safari] Fix authentication (#21090)
|
||||
* [vk] Use _apply_first_set_cookie_header
|
||||
* [vrt] Fix extraction (#20527)
|
||||
+ [canvas] Add support for vrtnieuws and sporza site ids and extract
|
||||
AES HLS formats
|
||||
+ [vrv] Extract captions (#19238)
|
||||
* [tele5] Improve video id extraction
|
||||
* [tele5] Relax URL regular expression (#21020, #21063)
|
||||
* [svtplay] Update API URL (#21075)
|
||||
+ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
|
||||
|
||||
|
||||
version 2019.05.11
|
||||
|
||||
Core
|
||||
|
70
README.md
70
README.md
@@ -752,8 +752,8 @@ As a last resort, you can also uninstall the version installed by your package m
|
||||
Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
|
||||
|
||||
```
|
||||
sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+x /usr/local/bin/youtube-dl
|
||||
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
hash -r
|
||||
```
|
||||
|
||||
@@ -1216,6 +1216,72 @@ Incorrect:
|
||||
'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
|
||||
```
|
||||
|
||||
### Inline values
|
||||
|
||||
Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
|
||||
### Collapse fallbacks
|
||||
|
||||
Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
|
||||
|
||||
#### Example
|
||||
|
||||
Good:
|
||||
|
||||
```python
|
||||
description = self._html_search_meta(
|
||||
['og:description', 'description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
```
|
||||
|
||||
Unwieldy:
|
||||
|
||||
```python
|
||||
description = (
|
||||
self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, default=None)
|
||||
or self._html_search_meta('twitter:description', webpage, default=None))
|
||||
```
|
||||
|
||||
Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
|
||||
|
||||
### Trailing parentheses
|
||||
|
||||
Always move trailing parentheses after the last argument.
|
||||
|
||||
#### Example
|
||||
|
||||
Correct:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list)
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
|
||||
list,
|
||||
)
|
||||
```
|
||||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
@@ -58,16 +58,8 @@
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
- **arte.tv:cinema**
|
||||
- **arte.tv:concert**
|
||||
- **arte.tv:creative**
|
||||
- **arte.tv:ddc**
|
||||
- **arte.tv:embed**
|
||||
- **arte.tv:future**
|
||||
- **arte.tv:info**
|
||||
- **arte.tv:magazine**
|
||||
- **arte.tv:playlist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
@@ -78,15 +70,12 @@
|
||||
- **AudioBoom**
|
||||
- **audiomack**
|
||||
- **audiomack:album**
|
||||
- **auroravid**: AuroraVid
|
||||
- **AWAAN**
|
||||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:weekly**
|
||||
@@ -107,6 +96,8 @@
|
||||
- **Bigflix**
|
||||
- **Bild**: Bild.de
|
||||
- **BiliBili**
|
||||
- **BilibiliAudio**
|
||||
- **BilibiliAudioAlbum**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
@@ -150,6 +141,7 @@
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
- **cbsnews**: CBS News
|
||||
- **cbsnews:embed**
|
||||
- **cbsnews:livevideo**: CBS News Live Videos
|
||||
- **CBSSports**
|
||||
- **CCMA**
|
||||
@@ -174,7 +166,6 @@
|
||||
- **Clipsyndicate**
|
||||
- **CloserToTruth**
|
||||
- **CloudflareStream**
|
||||
- **cloudtime**: CloudTime
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
@@ -190,11 +181,11 @@
|
||||
- **ComedyCentralShortname**
|
||||
- **ComedyCentralTV**
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **Corus**
|
||||
- **Coub**
|
||||
- **Cracked**
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
@@ -233,11 +224,12 @@
|
||||
- **DiscoveryNetworksDe**
|
||||
- **DiscoveryVR**
|
||||
- **Disney**
|
||||
- **dlive:stream**
|
||||
- **dlive:vod**
|
||||
- **Dotsub**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DPlay**
|
||||
- **DPlayIt**
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
@@ -290,12 +282,12 @@
|
||||
- **FiveThirtyEight**
|
||||
- **FiveTV**
|
||||
- **Flickr**
|
||||
- **Flipagram**
|
||||
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
|
||||
- **FootyRoom**
|
||||
- **Formula1**
|
||||
- **FOX**
|
||||
- **FOX9**
|
||||
- **FOX9News**
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
@@ -315,16 +307,12 @@
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **Funimation**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
- **FunnyOrDie**
|
||||
- **Funk**
|
||||
- **Fusion**
|
||||
- **Fux**
|
||||
- **FXNetworks**
|
||||
- **Gaia**
|
||||
- **GameInformer**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gaskrank**
|
||||
@@ -339,14 +327,12 @@
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
- **Go**
|
||||
- **Go90**
|
||||
- **GodTube**
|
||||
- **Golem**
|
||||
- **GoogleDrive**
|
||||
- **Goshgay**
|
||||
- **GPUTechConf**
|
||||
- **Groupon**
|
||||
- **Hark**
|
||||
- **hbo**
|
||||
- **HearThisAt**
|
||||
- **Heise**
|
||||
@@ -375,7 +361,6 @@
|
||||
- **Hungama**
|
||||
- **HungamaSong**
|
||||
- **Hypem**
|
||||
- **Iconosquare**
|
||||
- **ign.com**
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
@@ -415,7 +400,6 @@
|
||||
- **Kankan**
|
||||
- **Karaoketv**
|
||||
- **KarriereVideos**
|
||||
- **keek**
|
||||
- **KeezMovies**
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
@@ -439,7 +423,6 @@
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lecturio**
|
||||
- **LecturioCourse**
|
||||
@@ -460,6 +443,7 @@
|
||||
- **linkedin:learning:course**
|
||||
- **LinuxAcademy**
|
||||
- **LiTV**
|
||||
- **LiveJournal**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
- **livestream**
|
||||
@@ -472,11 +456,9 @@
|
||||
- **lynda**: lynda.com videos
|
||||
- **lynda:course**: lynda.com online courses
|
||||
- **m6**
|
||||
- **macgamestore**: MacGameStore trailers
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakerTV**
|
||||
- **MallTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
@@ -532,8 +514,8 @@
|
||||
- **mtg**: MTG services
|
||||
- **mtv**
|
||||
- **mtv.de**
|
||||
- **mtv81**
|
||||
- **mtv:video**
|
||||
- **mtvjapan**
|
||||
- **mtvservices:embedded**
|
||||
- **MuenchenTV**: münchen.tv
|
||||
- **MusicPlayOn**
|
||||
@@ -583,7 +565,6 @@
|
||||
- **NextTV**: 壹電視
|
||||
- **Nexx**
|
||||
- **NexxEmbed**
|
||||
- **nfb**: National Film Board of Canada
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
- **nhl.com**
|
||||
@@ -609,7 +590,6 @@
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
- **nowvideo**: NowVideo
|
||||
- **Noz**
|
||||
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **npo.nl:live**
|
||||
@@ -693,17 +673,16 @@
|
||||
- **PopcornTV**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **PornFlip**
|
||||
- **PornHd**
|
||||
- **PornHub**: PornHub and Thumbzilla
|
||||
- **PornHubPlaylist**
|
||||
- **PornHubUserVideos**
|
||||
- **PornHubPagedVideoList**
|
||||
- **PornHubUser**
|
||||
- **PornHubUserVideosUpload**
|
||||
- **Pornotube**
|
||||
- **PornoVoisines**
|
||||
- **PornoXO**
|
||||
- **PornTube**
|
||||
- **PressTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
@@ -734,6 +713,7 @@
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
- **RedBullTVRrnContent**
|
||||
- **Reddit**
|
||||
- **RedditR**
|
||||
- **RedTube**
|
||||
@@ -767,7 +747,6 @@
|
||||
- **rtve.es:television**
|
||||
- **RTVNH**
|
||||
- **RTVS**
|
||||
- **Rudo**
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
- **rutube:channel**: Rutube channels
|
||||
@@ -791,10 +770,11 @@
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **scrippsnetworks:watch**
|
||||
- **SCTE**
|
||||
- **SCTECourse**
|
||||
- **Seeker**
|
||||
- **SenateISVP**
|
||||
- **SendtoNews**
|
||||
- **ServingSys**
|
||||
- **Servus**
|
||||
- **Sexu**
|
||||
- **SeznamZpravy**
|
||||
@@ -825,6 +805,7 @@
|
||||
- **soundcloud:set**
|
||||
- **soundcloud:trackstation**
|
||||
- **soundcloud:user**
|
||||
- **SoundcloudEmbed**
|
||||
- **soundgasm**
|
||||
- **soundgasm:profile**
|
||||
- **southpark.cc.com**
|
||||
@@ -893,13 +874,14 @@
|
||||
- **TeleQuebec**
|
||||
- **TeleQuebecEmission**
|
||||
- **TeleQuebecLive**
|
||||
- **TeleQuebecSquat**
|
||||
- **TeleTask**
|
||||
- **Telewebion**
|
||||
- **TennisTV**
|
||||
- **TenPlay**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TheIntercept**
|
||||
- **theoperaplatform**
|
||||
- **ThePlatform**
|
||||
- **ThePlatformFeed**
|
||||
- **TheScene**
|
||||
@@ -935,11 +917,11 @@
|
||||
- **tunein:topic**
|
||||
- **TunePk**
|
||||
- **Turbo**
|
||||
- **Tutv**
|
||||
- **tv.dfb.de**
|
||||
- **TV2**
|
||||
- **tv2.hu**
|
||||
- **TV2Article**
|
||||
- **TV2DK**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TVA**
|
||||
@@ -1001,7 +983,6 @@
|
||||
- **VeeHD**
|
||||
- **Veoh**
|
||||
- **verystream**
|
||||
- **Vessel**
|
||||
- **Vesti**: Вести.Ru
|
||||
- **Vevo**
|
||||
- **VevoPlaylist**
|
||||
@@ -1016,7 +997,6 @@
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.google:search**: Google Video search
|
||||
- **video.mit.edu**
|
||||
- **VideoDetective**
|
||||
- **videofy.me**
|
||||
- **videomore**
|
||||
@@ -1024,7 +1004,6 @@
|
||||
- **videomore:video**
|
||||
- **VideoPremium**
|
||||
- **VideoPress**
|
||||
- **videoweed**: VideoWeed
|
||||
- **Vidio**
|
||||
- **VidLii**
|
||||
- **vidme**
|
||||
@@ -1035,7 +1014,6 @@
|
||||
- **vier:videos**
|
||||
- **ViewLift**
|
||||
- **ViewLiftEmbed**
|
||||
- **Viewster**
|
||||
- **Viidea**
|
||||
- **viki**
|
||||
- **viki:channel**
|
||||
@@ -1071,7 +1049,7 @@
|
||||
- **VoxMediaVolume**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **Vrak**
|
||||
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
|
||||
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
||||
- **VrtNU**: VrtNU.be
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
@@ -1101,8 +1079,6 @@
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wholecloud**: WholeCloud
|
||||
- **Wimp**
|
||||
- **Wistia**
|
||||
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
- **WorldStarHipHop**
|
||||
@@ -1111,9 +1087,10 @@
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
- **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XHamsterUser**
|
||||
- **xiami:album**: 虾米音乐 - 专辑
|
||||
- **xiami:artist**: 虾米音乐 - 歌手
|
||||
- **xiami:collection**: 虾米音乐 - 精选集
|
||||
@@ -1131,6 +1108,7 @@
|
||||
- **Yahoo**: Yahoo screen and movies
|
||||
- **yahoo:gyao**
|
||||
- **yahoo:gyao:player**
|
||||
- **yahoo:japannews**: Yahoo! Japan News
|
||||
- **YandexDisk**
|
||||
- **yandexmusic:album**: Яндекс.Музыка - Альбом
|
||||
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
|
||||
|
@@ -123,12 +123,6 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
|
||||
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
|
||||
|
||||
def test_yahoo_https(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/2701
|
||||
self.assertMatch(
|
||||
'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
|
||||
['Yahoo'])
|
||||
|
||||
def test_no_duplicated_ie_names(self):
|
||||
name_accu = collections.defaultdict(list)
|
||||
for ie in self.ies:
|
||||
|
@@ -73,6 +73,8 @@ from youtube_dl.utils import (
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
subtitles_filename,
|
||||
timeconvert,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
@@ -260,6 +262,11 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||
|
||||
def test_subtitles_filename(self):
|
||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
||||
self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
|
||||
|
||||
def test_remove_start(self):
|
||||
self.assertEqual(remove_start(None, 'A - '), None)
|
||||
self.assertEqual(remove_start('A - B', 'A - '), 'B')
|
||||
@@ -752,6 +759,18 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_strip_or_none(self):
|
||||
self.assertEqual(strip_or_none(' abc'), 'abc')
|
||||
self.assertEqual(strip_or_none('abc '), 'abc')
|
||||
self.assertEqual(strip_or_none(' abc '), 'abc')
|
||||
self.assertEqual(strip_or_none('\tabc\t'), 'abc')
|
||||
self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
|
||||
self.assertEqual(strip_or_none('abc'), 'abc')
|
||||
self.assertEqual(strip_or_none(''), '')
|
||||
self.assertEqual(strip_or_none(None), None)
|
||||
self.assertEqual(strip_or_none(42), None)
|
||||
self.assertEqual(strip_or_none([]), None)
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
@@ -809,6 +828,15 @@ class TestUtil(unittest.TestCase):
|
||||
'vcodec': 'av01.0.05M.08',
|
||||
'acodec': 'none',
|
||||
})
|
||||
self.assertEqual(parse_codecs('theora, vorbis'), {
|
||||
'vcodec': 'theora',
|
||||
'acodec': 'vorbis',
|
||||
})
|
||||
self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
|
||||
'vcodec': 'unknownvcodec',
|
||||
'acodec': 'unknownacodec',
|
||||
})
|
||||
self.assertEqual(parse_codecs('unknown'), {})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
|
@@ -852,8 +852,9 @@ class YoutubeDL(object):
|
||||
extract_flat = self.params.get('extract_flat', False)
|
||||
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
|
||||
or extract_flat is True):
|
||||
if self.params.get('forcejson', False):
|
||||
self.to_stdout(json.dumps(ie_result))
|
||||
self.__forced_printings(
|
||||
ie_result, self.prepare_filename(ie_result),
|
||||
incomplete=True)
|
||||
return ie_result
|
||||
|
||||
if result_type == 'video':
|
||||
@@ -1693,6 +1694,36 @@ class YoutubeDL(object):
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
def __forced_printings(self, info_dict, filename, incomplete):
|
||||
def print_mandatory(field):
|
||||
if (self.params.get('force%s' % field, False)
|
||||
and (not incomplete or info_dict.get(field) is not None)):
|
||||
self.to_stdout(info_dict[field])
|
||||
|
||||
def print_optional(field):
|
||||
if (self.params.get('force%s' % field, False)
|
||||
and info_dict.get(field) is not None):
|
||||
self.to_stdout(info_dict[field])
|
||||
|
||||
print_mandatory('title')
|
||||
print_mandatory('id')
|
||||
if self.params.get('forceurl', False) and not incomplete:
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
for f in info_dict['requested_formats']:
|
||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||
else:
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
print_optional('thumbnail')
|
||||
print_optional('description')
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
self.to_stdout(filename)
|
||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||
print_mandatory('format')
|
||||
if self.params.get('forcejson', False):
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
|
||||
def process_info(self, info_dict):
|
||||
"""Process a single resolved IE result."""
|
||||
|
||||
@@ -1703,9 +1734,8 @@ class YoutubeDL(object):
|
||||
if self._num_downloads >= int(max_downloads):
|
||||
raise MaxDownloadsReached()
|
||||
|
||||
# TODO: backward compatibility, to be removed
|
||||
info_dict['fulltitle'] = info_dict['title']
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
@@ -1720,29 +1750,7 @@ class YoutubeDL(object):
|
||||
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
|
||||
|
||||
# Forced printings
|
||||
if self.params.get('forcetitle', False):
|
||||
self.to_stdout(info_dict['fulltitle'])
|
||||
if self.params.get('forceid', False):
|
||||
self.to_stdout(info_dict['id'])
|
||||
if self.params.get('forceurl', False):
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
for f in info_dict['requested_formats']:
|
||||
self.to_stdout(f['url'] + f.get('play_path', ''))
|
||||
else:
|
||||
# For RTMP URLs, also include the playpath
|
||||
self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
|
||||
if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
|
||||
self.to_stdout(info_dict['thumbnail'])
|
||||
if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
|
||||
self.to_stdout(info_dict['description'])
|
||||
if self.params.get('forcefilename', False) and filename is not None:
|
||||
self.to_stdout(filename)
|
||||
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||
if self.params.get('forceformat', False):
|
||||
self.to_stdout(info_dict['format'])
|
||||
if self.params.get('forcejson', False):
|
||||
self.to_stdout(json.dumps(info_dict))
|
||||
self.__forced_printings(info_dict, filename, incomplete=False)
|
||||
|
||||
# Do nothing else if in simulate mode
|
||||
if self.params.get('simulate', False):
|
||||
@@ -1783,6 +1791,8 @@ class YoutubeDL(object):
|
||||
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
|
||||
self.to_screen('[info] Video annotations are already present')
|
||||
elif not info_dict.get('annotations'):
|
||||
self.report_warning('There are no annotations to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||
@@ -1804,7 +1814,7 @@ class YoutubeDL(object):
|
||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
||||
for sub_lang, sub_info in subtitles.items():
|
||||
sub_format = sub_info['ext']
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
|
||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
|
||||
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
|
||||
else:
|
||||
|
@@ -94,7 +94,7 @@ def _real_main(argv=None):
|
||||
if opts.verbose:
|
||||
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
|
||||
except IOError:
|
||||
sys.exit('ERROR: batch file could not be read')
|
||||
sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
|
||||
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
|
||||
_enc = preferredencoding()
|
||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||
|
@@ -176,7 +176,9 @@ class FileDownloader(object):
|
||||
return
|
||||
speed = float(byte_counter) / elapsed
|
||||
if speed > rate_limit:
|
||||
time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
|
||||
sleep_time = float(byte_counter) / rate_limit - elapsed
|
||||
if sleep_time > 0:
|
||||
time.sleep(sleep_time)
|
||||
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
|
@@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD):
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
# whole download to fail. However if the same fragment is immediately
|
||||
# retried with the same request data this usually succeeds (1-2 attemps
|
||||
# retried with the same request data this usually succeeds (1-2 attempts
|
||||
# is usually enough) thus allowing to download the whole file successfully.
|
||||
# To be future-proof we will retry all fragments that fail with any
|
||||
# HTTP error.
|
||||
|
@@ -194,6 +194,7 @@ class Aria2cFD(ExternalFD):
|
||||
cmd += self._option('--interface', 'source_address')
|
||||
cmd += self._option('--all-proxy', 'proxy')
|
||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
||||
|
@@ -190,12 +190,13 @@ class FragmentFD(FileDownloader):
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
|
||||
'downloaded_bytes': resume_len,
|
||||
'fragment_index': ctx['fragment_index'],
|
||||
'fragment_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
@@ -234,8 +235,8 @@ class FragmentFD(FileDownloader):
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(
|
||||
start, time_now, estimated_size,
|
||||
state['downloaded_bytes'])
|
||||
start, time_now, estimated_size - resume_len,
|
||||
state['downloaded_bytes'] - resume_len)
|
||||
state['speed'] = s.get('speed') or ctx.get('speed')
|
||||
ctx['speed'] = state['speed']
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
|
@@ -146,7 +146,7 @@ def write_piff_header(stream, params):
|
||||
sps, pps = codec_private_data.split(u32.pack(1))[1:]
|
||||
avcc_payload = u8.pack(1) # configuration version
|
||||
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
|
||||
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
|
||||
avcc_payload += u16.pack(len(sps))
|
||||
avcc_payload += sps
|
||||
|
@@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE):
|
||||
IE_NAME = 'abcnews:video'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
abcnews\.go\.com/
|
||||
(?:
|
||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
||||
video/embed\?.*?\bid=
|
||||
)|
|
||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
|
@@ -7,6 +7,7 @@ import functools
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -27,7 +28,7 @@ class ACastIE(InfoExtractor):
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||
'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
|
||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
||||
'info_dict': {
|
||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'ext': 'mp3',
|
||||
@@ -46,28 +47,37 @@ class ACastIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||
s = self._download_json(
|
||||
'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
||||
display_id)
|
||||
media_url = s['url']
|
||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
||||
episode_url = s.get('episodeUrl')
|
||||
if episode_url:
|
||||
display_id = episode_url
|
||||
else:
|
||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
||||
cast_data = self._download_json(
|
||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
||||
display_id)['result']
|
||||
e = cast_data['episode']
|
||||
title = e['name']
|
||||
title = e.get('name') or s['title']
|
||||
return {
|
||||
'id': compat_str(e['id']),
|
||||
'display_id': display_id,
|
||||
'url': media_url,
|
||||
'title': title,
|
||||
'description': e.get('description') or e.get('summary'),
|
||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
||||
'thumbnail': e.get('image'),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate')),
|
||||
'duration': float_or_none(s.get('duration') or e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
||||
'filesize': int_or_none(e.get('contentLength')),
|
||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
||||
|
@@ -25,6 +25,11 @@ MSO_INFO = {
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'ATT': {
|
||||
'name': 'AT&T U-verse',
|
||||
'username_field': 'userid',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'ATTOTT': {
|
||||
'name': 'DIRECTV NOW',
|
||||
'username_field': 'email',
|
||||
|
@@ -4,17 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
qualities,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
@@ -25,59 +18,7 @@ from ..utils import (
|
||||
# add tests.
|
||||
|
||||
|
||||
class ArteTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
|
||||
IE_NAME = 'arte.tv'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
|
||||
ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
|
||||
ref_xml_doc = self._download_xml(
|
||||
ref_xml_url, video_id, note='Downloading metadata')
|
||||
config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
|
||||
config_xml_url = config_node.attrib['ref']
|
||||
config = self._download_xml(
|
||||
config_xml_url, video_id, note='Downloading configuration')
|
||||
|
||||
formats = [{
|
||||
'format_id': q.attrib['quality'],
|
||||
# The playpath starts at 'mp4:', if we don't manually
|
||||
# split the url, rtmpdump will incorrectly parse them
|
||||
'url': q.text.split('mp4:', 1)[0],
|
||||
'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
|
||||
'ext': 'flv',
|
||||
'quality': 2 if q.attrib['quality'] == 'hd' else 1,
|
||||
} for q in config.findall('./urls/url')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = config.find('.//name').text
|
||||
thumbnail = config.find('.//firstThumbnailUrl').text
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ArteTVBaseIE(InfoExtractor):
|
||||
@classmethod
|
||||
def _extract_url_info(cls, url):
|
||||
mobj = re.match(cls._VALID_URL, url)
|
||||
lang = mobj.group('lang')
|
||||
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
if 'vid' in query:
|
||||
video_id = query['vid'][0]
|
||||
else:
|
||||
# This is not a real id, it can be for example AJT for the news
|
||||
# http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
|
||||
video_id = mobj.group('id')
|
||||
return video_id, lang
|
||||
|
||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
info = self._download_json(json_url, video_id)
|
||||
player_info = info['videoJsonPlayer']
|
||||
@@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
'upload_date': unified_strdate(upload_date_str),
|
||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||
}
|
||||
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||
|
||||
LANGS = {
|
||||
'fr': 'F',
|
||||
'de': 'A',
|
||||
'en': 'E[ANG]',
|
||||
'es': 'E[ESP]',
|
||||
'it': 'E[ITA]',
|
||||
'pl': 'E[POL]',
|
||||
}
|
||||
|
||||
langcode = LANGS.get(lang, lang)
|
||||
@@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
l = re.escape(langcode)
|
||||
|
||||
# Language preference from most to least priority
|
||||
# Reference: section 5.6.3 of
|
||||
# http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
|
||||
# Reference: section 6.8 of
|
||||
# https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
|
||||
PREFERENCES = (
|
||||
# original version in requested language, without subtitles
|
||||
r'VO{0}$'.format(l),
|
||||
@@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'info_dict': {
|
||||
'id': '088501-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mexico: Stealing Petrol to Survive',
|
||||
'upload_date': '20190628',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._extract_from_webpage(webpage, video_id, lang)
|
||||
|
||||
def _extract_from_webpage(self, webpage, video_id, lang):
|
||||
patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
|
||||
ids = (video_id, '')
|
||||
# some pages contain multiple videos (like
|
||||
# http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
|
||||
# so we first try to look for json URLs that contain the video id from
|
||||
# the 'vid' parameter.
|
||||
patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
|
||||
json_url = self._html_search_regex(
|
||||
patterns, webpage, 'json vp url', default=None)
|
||||
if not json_url:
|
||||
def find_iframe_url(webpage, default=NO_DEFAULT):
|
||||
return self._html_search_regex(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
|
||||
webpage, 'iframe url', group='url', default=default)
|
||||
|
||||
iframe_url = find_iframe_url(webpage, None)
|
||||
if not iframe_url:
|
||||
embed_url = self._html_search_regex(
|
||||
r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
|
||||
if embed_url:
|
||||
player = self._download_json(
|
||||
embed_url, video_id, 'Downloading player page')
|
||||
iframe_url = find_iframe_url(player['html'])
|
||||
# en and es URLs produce react-based pages with different layout (e.g.
|
||||
# http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
|
||||
if not iframe_url:
|
||||
program = self._search_regex(
|
||||
r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
|
||||
webpage, 'program', default=None)
|
||||
if program:
|
||||
embed_html = self._parse_json(program, video_id)
|
||||
if embed_html:
|
||||
iframe_url = find_iframe_url(embed_html['embed_html'])
|
||||
if iframe_url:
|
||||
json_url = compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
|
||||
if json_url:
|
||||
title = self._search_regex(
|
||||
r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
|
||||
webpage, 'title', default=None, group='title')
|
||||
return self._extract_from_json_url(json_url, video_id, lang, title=title)
|
||||
# Different kind of embed URL (e.g.
|
||||
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
|
||||
entries = [
|
||||
self.url_result(url)
|
||||
for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)]
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
# It also uses the arte_vp_url url from the webpage to extract the information
|
||||
class ArteTVCreativeIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:creative'
|
||||
_VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1',
|
||||
'info_dict': {
|
||||
'id': '057405-001-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)',
|
||||
'upload_date': '20150716',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
|
||||
'playlist_count': 11,
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVInfoIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:info'
|
||||
_VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
|
||||
'info_dict': {
|
||||
'id': '067528-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Service civique, un cache misère ?',
|
||||
'upload_date': '20160403',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVFutureIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:future'
|
||||
_VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses',
|
||||
'info_dict': {
|
||||
'id': '050940-028-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Les écrevisses aussi peuvent être anxieuses',
|
||||
'upload_date': '20140902',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVDDCIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:ddc'
|
||||
_VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, lang = self._extract_url_info(url)
|
||||
if lang == 'folge':
|
||||
lang = 'de'
|
||||
elif lang == 'emission':
|
||||
lang = 'fr'
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
|
||||
script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
|
||||
javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
|
||||
json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class ArteTVConcertIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:concert'
|
||||
_VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
|
||||
'md5': '9ea035b7bd69696b67aa2ccaaa218161',
|
||||
'info_dict': {
|
||||
'id': '186',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
|
||||
'upload_date': '20140128',
|
||||
'description': 'md5:486eb08f991552ade77439fe6d82c305',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVCinemaIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:cinema'
|
||||
_VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck',
|
||||
'md5': 'a5b9dd5575a11d93daf0e3f404f45438',
|
||||
'info_dict': {
|
||||
'id': '062494-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck',
|
||||
'upload_date': '20150807',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVMagazineIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:magazine'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..."
|
||||
'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium',
|
||||
'md5': '2a9369bcccf847d1c741e51416299f25',
|
||||
'info_dict': {
|
||||
'id': '065965-000-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trepalium - Extrait Ep.01',
|
||||
'upload_date': '20160121',
|
||||
},
|
||||
}, {
|
||||
# Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium"
|
||||
'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium',
|
||||
'md5': 'fedc64fc7a946110fe311634e79782ca',
|
||||
'info_dict': {
|
||||
'id': '054813-004_PLUS7-F',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trepalium (4/6)',
|
||||
'description': 'md5:10057003c34d54e95350be4f9b05cb40',
|
||||
'upload_date': '20160218',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis',
|
||||
'only_matching': True,
|
||||
}]
|
||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(
|
||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
||||
video_id, lang)
|
||||
|
||||
|
||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'arte.tv:embed'
|
||||
_VALID_URL = r'''(?x)
|
||||
http://www\.arte\.tv
|
||||
/(?:playerv2/embed|arte_vp/index)\.php\?json_url=
|
||||
https://www\.arte\.tv
|
||||
/player/v3/index\.php\?json_url=
|
||||
(?P<json_url>
|
||||
http://arte\.tv/papi/tvguide/videos/stream/player/
|
||||
(?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
|
||||
https?://api\.arte\.tv/api/player/v1/config/
|
||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
||||
)
|
||||
'''
|
||||
|
||||
_TESTS = []
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang')
|
||||
json_url = mobj.group('json_url')
|
||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
||||
return self._extract_from_json_url(json_url, video_id, lang)
|
||||
|
||||
|
||||
class TheOperaPlatformIE(ArteTVPlus7IE):
|
||||
IE_NAME = 'theoperaplatform'
|
||||
_VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
|
||||
'md5': '970655901fa2e82e04c00b955e9afe7b',
|
||||
'info_dict': {
|
||||
'id': '060338-009-A',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verdi - OTELLO',
|
||||
'upload_date': '20160927',
|
||||
},
|
||||
}]
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'info_dict': {
|
||||
'id': 'PL-013263',
|
||||
'title': 'Areva & Uramin',
|
||||
'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
|
||||
'id': 'RC-016954',
|
||||
'title': 'Earn a Living',
|
||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, lang = self._extract_url_info(url)
|
||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||
collection = self._download_json(
|
||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
||||
% (lang, playlist_id), playlist_id)
|
||||
|
@@ -5,14 +5,12 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
)
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class AsianCrushIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
@@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor):
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:3db14e9186197857e7063522cb89a805',
|
||||
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
@@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor):
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
||||
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
@@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor):
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
||||
video_title=title)
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
|
||||
_TEST = {
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
||||
'info_dict': {
|
||||
'id': '12481',
|
||||
@@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
||||
},
|
||||
'playlist_count': 20,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
|
||||
' | AsianCrush')
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
|
@@ -1,202 +1,118 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import time
|
||||
import hmac
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
|
||||
'md5': 'efd56753cda1bb64df52a3074f62e38a',
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-10-especial-solidario-nochebuena',
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Especial Solidario de Nochebuena',
|
||||
'description': 'md5:e2d52ff12214fa937107d21064075bf1',
|
||||
'duration': 5527.6,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
|
||||
'md5': '6e52cbb513c405e403dbacb7aacf8747',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-112-david-bustamante',
|
||||
'ext': 'flv',
|
||||
'title': 'David Bustamante',
|
||||
'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
|
||||
'duration': 1439.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
_USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
|
||||
_MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
|
||||
_TIMESTAMP_SHIFT = 30000
|
||||
|
||||
_TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
|
||||
_URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
|
||||
_PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
|
||||
_EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
|
||||
|
||||
_LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
|
||||
|
||||
_ERRORS = {
|
||||
'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
|
||||
'DELETED': 'This video has expired and is no longer available for online streaming.',
|
||||
'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
|
||||
# 'PREMIUM': 'PREMIUM',
|
||||
}
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'j_username': username,
|
||||
'j_password': password,
|
||||
}
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in')
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
}, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 400)
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
|
||||
response, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
except ExtractorError as e:
|
||||
self._handle_error(e, 403)
|
||||
|
||||
episode_id = self._search_regex(
|
||||
r'episode="([^"]+)"', webpage, 'episode id')
|
||||
|
||||
request = sanitized_Request(
|
||||
self._PLAYER_URL_TEMPLATE % episode_id,
|
||||
headers={'User-Agent': self._USER_AGENT})
|
||||
player = self._download_json(request, episode_id, 'Downloading player JSON')
|
||||
|
||||
episode_type = player.get('typeOfEpisode')
|
||||
error_message = self._ERRORS.get(episode_type)
|
||||
if error_message:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
title = episode['titulo']
|
||||
|
||||
formats = []
|
||||
video_url = player.get('urlVideo')
|
||||
if video_url:
|
||||
format_info = {
|
||||
'url': video_url,
|
||||
'format_id': 'http',
|
||||
}
|
||||
mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
|
||||
if mobj:
|
||||
format_info.update({
|
||||
'width': int_or_none(mobj.group('width')),
|
||||
'height': int_or_none(mobj.group('height')),
|
||||
'tbr': int_or_none(mobj.group('bitrate')),
|
||||
})
|
||||
formats.append(format_info)
|
||||
|
||||
timestamp = int_or_none(self._download_webpage(
|
||||
self._TIME_API_URL,
|
||||
video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
|
||||
timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
|
||||
token = hmac.new(
|
||||
self._MAGIC.encode('ascii'),
|
||||
(episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
|
||||
).hexdigest()
|
||||
|
||||
request = sanitized_Request(
|
||||
self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
|
||||
headers={'User-Agent': self._USER_AGENT})
|
||||
|
||||
fmt_json = self._download_json(
|
||||
request, video_id, 'Downloading windows video JSON')
|
||||
|
||||
result = fmt_json.get('resultDes')
|
||||
if result.lower() != 'ok':
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, result), expected=True)
|
||||
|
||||
for format_id, video_url in fmt_json['resultObject'].items():
|
||||
if format_id == 'token' or not video_url.startswith('http'):
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
if 'geodeswowsmpra3player' in video_url:
|
||||
# f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
|
||||
# f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
|
||||
# this videos are protected by DRM, the f4m downloader doesn't support them
|
||||
continue
|
||||
video_url_hd = video_url.replace('free_es', 'es')
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
|
||||
fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
|
||||
fatal=False))
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
path_data = player.get('pathData')
|
||||
|
||||
episode = self._download_xml(
|
||||
self._EPISODE_URL_TEMPLATE % path_data, video_id,
|
||||
'Downloading episode XML')
|
||||
|
||||
duration = float_or_none(xpath_text(
|
||||
episode, './media/asset/info/technical/contentDuration', 'duration'))
|
||||
|
||||
art = episode.find('./media/asset/info/art')
|
||||
title = xpath_text(art, './name', 'title')
|
||||
description = xpath_text(art, './description', 'description')
|
||||
thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
|
||||
|
||||
subtitles = {}
|
||||
subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
|
||||
if subtitle_url:
|
||||
subtitles['es'] = [{
|
||||
'ext': 'srt',
|
||||
'url': subtitle_url,
|
||||
}]
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
}
|
||||
|
@@ -2,22 +2,25 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AudioBoomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
|
||||
'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
|
||||
'md5': '7b00192e593ff227e6a315486979a42d',
|
||||
'info_dict': {
|
||||
'id': '4279833',
|
||||
'id': '7398103',
|
||||
'ext': 'mp3',
|
||||
'title': '3/09/2016 Czaban Hour 3',
|
||||
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
|
||||
'duration': 2245.72,
|
||||
'uploader': 'SB Nation A.M.',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
'title': 'Asim Chaudhry',
|
||||
'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
@@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor):
|
||||
clip = None
|
||||
|
||||
clip_store = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
|
||||
self._html_search_regex(
|
||||
r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
|
||||
webpage, 'clip store', default='{}', group='json'),
|
||||
video_id, fatal=False)
|
||||
if clip_store:
|
||||
@@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor):
|
||||
|
||||
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
|
||||
'audio', webpage, 'audio url')
|
||||
title = from_clip('title') or self._og_search_title(webpage)
|
||||
description = from_clip('description') or self._og_search_description(webpage)
|
||||
title = from_clip('title') or self._html_search_meta(
|
||||
['og:title', 'og:audio:title', 'audio_title'], webpage)
|
||||
description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
|
||||
|
||||
duration = float_or_none(from_clip('duration') or self._html_search_meta(
|
||||
'weibo:audio:duration', webpage))
|
||||
|
||||
uploader = from_clip('author') or self._og_search_property(
|
||||
'audio:artist', webpage, 'uploader', fatal=False)
|
||||
uploader = from_clip('author') or self._html_search_meta(
|
||||
['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
|
||||
uploader_url = from_clip('author_url') or self._html_search_meta(
|
||||
'audioboo:channel', webpage, 'uploader url')
|
||||
|
||||
|
@@ -1,142 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BambuserIE(InfoExtractor):
|
||||
IE_NAME = 'bambuser'
|
||||
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
|
||||
_API_KEY = '005f64509e19a868399060af746a00aa'
|
||||
_LOGIN_URL = 'https://bambuser.com/user'
|
||||
_NETRC_MACHINE = 'bambuser'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://bambuser.com/v/4050584',
|
||||
# MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
|
||||
# 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
|
||||
'info_dict': {
|
||||
'id': '4050584',
|
||||
'ext': 'flv',
|
||||
'title': 'Education engineering days - lightning talks',
|
||||
'duration': 3741,
|
||||
'uploader': 'pixelversity',
|
||||
'uploader_id': '344706',
|
||||
'timestamp': 1382976692,
|
||||
'upload_date': '20131028',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# It doesn't respect the 'Range' header, it would download the whole video
|
||||
# caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'form_id': 'user_login',
|
||||
'op': 'Log in',
|
||||
'name': username,
|
||||
'pass': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(
|
||||
self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in')
|
||||
|
||||
login_error = self._html_search_regex(
|
||||
r'(?s)<div class="messages error">(.+?)</div>',
|
||||
response, 'login error', default=None)
|
||||
if login_error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % login_error, expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
|
||||
% (self._API_KEY, video_id), video_id)
|
||||
|
||||
error = info.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
result = info['result']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': result['title'],
|
||||
'url': result['url'],
|
||||
'thumbnail': result.get('preview'),
|
||||
'duration': int_or_none(result.get('length')),
|
||||
'uploader': result.get('username'),
|
||||
'uploader_id': compat_str(result.get('owner', {}).get('uid')),
|
||||
'timestamp': int_or_none(result.get('created')),
|
||||
'fps': float_or_none(result.get('framerate')),
|
||||
'view_count': int_or_none(result.get('views_total')),
|
||||
'comment_count': int_or_none(result.get('comment_count')),
|
||||
}
|
||||
|
||||
|
||||
class BambuserChannelIE(InfoExtractor):
|
||||
IE_NAME = 'bambuser:channel'
|
||||
_VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
|
||||
# The maximum number we can get with each request
|
||||
_STEP = 50
|
||||
_TEST = {
|
||||
'url': 'http://bambuser.com/channel/pixelversity',
|
||||
'info_dict': {
|
||||
'title': 'pixelversity',
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
urls = []
|
||||
last_id = ''
|
||||
for i in itertools.count(1):
|
||||
req_url = (
|
||||
'http://bambuser.com/xhr-api/index.php?username={user}'
|
||||
'&sort=created&access_mode=0%2C1%2C2&limit={count}'
|
||||
'&method=broadcast&format=json&vid_older_than={last}'
|
||||
).format(user=user, count=self._STEP, last=last_id)
|
||||
req = sanitized_Request(req_url)
|
||||
# Without setting this header, we wouldn't get any result
|
||||
req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
|
||||
data = self._download_json(
|
||||
req, user, 'Downloading page %d' % i)
|
||||
results = data['result']
|
||||
if not results:
|
||||
break
|
||||
last_id = results[-1]['vid']
|
||||
urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': user,
|
||||
'entries': urls,
|
||||
}
|
@@ -40,6 +40,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/|
|
||||
sounds/play/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
@@ -70,7 +71,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'b039d07m',
|
||||
'ext': 'flv',
|
||||
'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
|
||||
'title': 'Kaleidoscope, Leonard Cohen',
|
||||
'description': 'The Canadian poet and songwriter reflects on his musical career.',
|
||||
},
|
||||
'params': {
|
||||
@@ -220,6 +221,20 @@ class BBCCoUkIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'm0007jz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||
'duration': 9840,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
@@ -609,7 +624,7 @@ class BBCIE(BBCCoUkIE):
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
|
@@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE):
|
||||
|
||||
class BeamProVodIE(BeamProBaseIE):
|
||||
IE_NAME = 'Mixer:vod'
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
||||
'info_dict': {
|
||||
@@ -119,7 +119,13 @@ class BeamProVodIE(BeamProBaseIE):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_format(vod, vod_type):
|
||||
|
@@ -1,7 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
@@ -11,6 +14,7 @@ from ..utils import (
|
||||
class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# api/v6 v1
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'info_dict': {
|
||||
@@ -24,6 +28,14 @@ class BeegIE(InfoExtractor):
|
||||
'tags': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# api/v6 v2
|
||||
'url': 'https://beeg.com/1941093077?t=911-1391',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# api/v6 v2 w/o t
|
||||
'url': 'https://beeg.com/1277207756',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beeg.porn/video/5416503',
|
||||
'only_matching': True,
|
||||
@@ -41,11 +53,25 @@ class BeegIE(InfoExtractor):
|
||||
r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
|
||||
default='1546225636701')
|
||||
|
||||
if len(video_id) >= 10:
|
||||
query = {
|
||||
'v': 2,
|
||||
}
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
t = qs.get('t', [''])[0].split('-')
|
||||
if len(t) > 1:
|
||||
query.update({
|
||||
's': t[0],
|
||||
'e': t[1],
|
||||
})
|
||||
else:
|
||||
query = {'v': 1}
|
||||
|
||||
for api_path in ('', 'api.'):
|
||||
video = self._download_json(
|
||||
'https://%sbeeg.com/api/v6/%s/video/%s'
|
||||
% (api_path, beeg_version, video_id), video_id,
|
||||
fatal=api_path == 'api.')
|
||||
fatal=api_path == 'api.', query=query)
|
||||
if video:
|
||||
break
|
||||
|
||||
|
@@ -22,7 +22,8 @@ class BellMediaIE(InfoExtractor):
|
||||
bravo|
|
||||
mtv|
|
||||
space|
|
||||
etalk
|
||||
etalk|
|
||||
marilyn
|
||||
)\.ca|
|
||||
much\.com
|
||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
@@ -70,6 +71,7 @@ class BellMediaIE(InfoExtractor):
|
||||
'animalplanet': 'aniplan',
|
||||
'etalk': 'ctv',
|
||||
'bnnbloomberg': 'bnn',
|
||||
'marilyn': 'ctv_marilyn',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -15,6 +15,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
@@ -306,3 +307,115 @@ class BiliBiliBangumiIE(InfoExtractor):
|
||||
return self.playlist_result(
|
||||
entries, bangumi_id,
|
||||
season_info.get('bangumi_title'), season_info.get('evaluate'))
|
||||
|
||||
|
||||
class BilibiliAudioBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, sid, query=None):
|
||||
if not query:
|
||||
query = {'sid': sid}
|
||||
return self._download_json(
|
||||
'https://www.bilibili.com/audio/music-service-c/web/' + path,
|
||||
sid, query=query)['data']
|
||||
|
||||
|
||||
class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bilibili.com/audio/au1003142',
|
||||
'md5': 'fec4987014ec94ef9e666d4d158ad03b',
|
||||
'info_dict': {
|
||||
'id': '1003142',
|
||||
'ext': 'm4a',
|
||||
'title': '【tsukimi】YELLOW / 神山羊',
|
||||
'artist': 'tsukimi',
|
||||
'comment_count': int,
|
||||
'description': 'YELLOW的mp3版!',
|
||||
'duration': 183,
|
||||
'subtitles': {
|
||||
'origin': [{
|
||||
'ext': 'lrc',
|
||||
}],
|
||||
},
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1564836614,
|
||||
'upload_date': '20190803',
|
||||
'uploader': 'tsukimi-つきみぐー',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
au_id = self._match_id(url)
|
||||
|
||||
play_data = self._call_api('url', au_id)
|
||||
formats = [{
|
||||
'url': play_data['cdns'][0],
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
}]
|
||||
|
||||
song = self._call_api('song/info', au_id)
|
||||
title = song['title']
|
||||
statistic = song.get('statistic') or {}
|
||||
|
||||
subtitles = None
|
||||
lyric = song.get('lyric')
|
||||
if lyric:
|
||||
subtitles = {
|
||||
'origin': [{
|
||||
'url': lyric,
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': au_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'artist': song.get('author'),
|
||||
'comment_count': int_or_none(statistic.get('comment')),
|
||||
'description': song.get('intro'),
|
||||
'duration': int_or_none(song.get('duration')),
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': song.get('cover'),
|
||||
'timestamp': int_or_none(song.get('passtime')),
|
||||
'uploader': song.get('uname'),
|
||||
'view_count': int_or_none(statistic.get('play')),
|
||||
}
|
||||
|
||||
|
||||
class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bilibili.com/audio/am10624',
|
||||
'info_dict': {
|
||||
'id': '10624',
|
||||
'title': '每日新曲推荐(每日11:00更新)',
|
||||
'description': '每天11:00更新,为你推送最新音乐',
|
||||
},
|
||||
'playlist_count': 19,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
am_id = self._match_id(url)
|
||||
|
||||
songs = self._call_api(
|
||||
'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
|
||||
|
||||
entries = []
|
||||
for song in songs:
|
||||
sid = str_or_none(song.get('id'))
|
||||
if not sid:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'https://www.bilibili.com/audio/au' + sid,
|
||||
BilibiliAudioIE.ie_key(), sid))
|
||||
|
||||
if entries:
|
||||
album_data = self._call_api('menu/info', am_id) or {}
|
||||
album_title = album_data.get('title')
|
||||
if album_title:
|
||||
for entry in entries:
|
||||
entry['album'] = album_title
|
||||
return self.playlist_result(
|
||||
entries, am_id, album_title, album_data.get('intro'))
|
||||
|
||||
return self.playlist_result(entries, am_id)
|
||||
|
@@ -6,7 +6,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
remove_end,
|
||||
)
|
||||
from .rudo import RudoIE
|
||||
|
||||
|
||||
class BioBioChileTVIE(InfoExtractor):
|
||||
@@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
|
||||
'info_dict': {
|
||||
'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
|
||||
'id': 'b4xd0LK3SK',
|
||||
'ext': 'mp4',
|
||||
'uploader': '(none)',
|
||||
'upload_date': '20160708',
|
||||
'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
|
||||
# TODO: fix url_transparent information overriding
|
||||
# 'uploader': 'Juan Pablo Echenique',
|
||||
'title': 'Comentario Oscar Cáceres',
|
||||
},
|
||||
'params': {
|
||||
# empty m3u8 manifest
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
|
||||
@@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
rudo_url = RudoIE._extract_url(webpage)
|
||||
rudo_url = self._search_regex(
|
||||
r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
|
||||
webpage, 'embed URL', None, group='url')
|
||||
if not rudo_url:
|
||||
raise ExtractorError('No videos found')
|
||||
|
||||
@@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
|
@@ -42,7 +42,7 @@ class BIQLEIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_url = self._proto_relative_url(self._search_regex(
|
||||
r'<iframe.+?src="((?:https?:)?//daxab\.com/[^"]+)".*?></iframe>',
|
||||
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
|
||||
webpage, 'embed url'))
|
||||
if VKIE.suitable(embed_url):
|
||||
return self.url_result(embed_url, VKIE.ie_key(), video_id)
|
||||
|
@@ -55,6 +55,11 @@ class BitChuteIE(InfoExtractor):
|
||||
formats = [
|
||||
{'url': format_url}
|
||||
for format_url in orderedSet(format_urls)]
|
||||
|
||||
if not formats:
|
||||
formats = self._parse_html5_media_entries(
|
||||
url, webpage, video_id)[0]['formats']
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
@@ -65,8 +70,9 @@ class BitChuteIE(InfoExtractor):
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
|
||||
'uploader', fatal=False)
|
||||
(r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
video = article_data.get('video')
|
||||
if video:
|
||||
video_type = video['type']
|
||||
if video_type == 'cms.bleacherreport.com':
|
||||
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
elif video_type == 'ooyala.com':
|
||||
info['url'] = 'ooyala:%s' % video['id']
|
||||
@@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor):
|
||||
|
||||
|
||||
class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
@@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
|
||||
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
@@ -11,8 +11,8 @@ from ..utils import ExtractorError
|
||||
class BokeCCBaseIE(InfoExtractor):
|
||||
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
|
||||
player_params_str = self._html_search_regex(
|
||||
r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
|
||||
webpage, 'player params')
|
||||
r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
|
||||
webpage, 'player params', group='query')
|
||||
|
||||
player_params = compat_parse_qs(player_params_str)
|
||||
|
||||
@@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE):
|
||||
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
|
||||
'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
|
||||
'info_dict': {
|
||||
'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
|
||||
'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
|
||||
'ext': 'flv',
|
||||
'title': 'BokeCC Video',
|
||||
},
|
||||
|
@@ -2,7 +2,6 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
|
||||
@@ -11,14 +10,12 @@ from .adobepass import AdobePassIE
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_urlparse,
|
||||
compat_xml_parse_error,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
@@ -27,18 +24,19 @@ from ..utils import (
|
||||
js_to_json,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
clean_html,
|
||||
mimetype2ext,
|
||||
UnsupportedError,
|
||||
)
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
IE_NAME = 'brightcove:legacy'
|
||||
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
|
||||
_FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -55,7 +53,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'timestamp': 1368213670,
|
||||
'upload_date': '20130510',
|
||||
'uploader_id': '1589608506001',
|
||||
}
|
||||
},
|
||||
'skip': 'The player has been deactivated by the content owner',
|
||||
},
|
||||
{
|
||||
# From http://medianetwork.oracle.com/video/player/1785452137001
|
||||
@@ -70,6 +69,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'upload_date': '20120814',
|
||||
'uploader_id': '1460825906',
|
||||
},
|
||||
'skip': 'video not playable',
|
||||
},
|
||||
{
|
||||
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
|
||||
@@ -79,7 +79,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This Bracelet Acts as a Personal Thermostat',
|
||||
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
|
||||
'uploader': 'Mashable',
|
||||
# 'uploader': 'Mashable',
|
||||
'timestamp': 1382041798,
|
||||
'upload_date': '20131017',
|
||||
'uploader_id': '1130468786001',
|
||||
@@ -124,6 +124,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'id': '3550319591001',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
'skip': 'Unsupported URL',
|
||||
},
|
||||
{
|
||||
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
||||
@@ -133,6 +134,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'title': 'Lesson 08',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
'skip': 'Unsupported URL',
|
||||
},
|
||||
{
|
||||
# playerID inferred from bcpid
|
||||
@@ -141,12 +143,6 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'only_matching': True, # Tested in GenericIE
|
||||
}
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
2: 'ON2',
|
||||
3: 'H264',
|
||||
4: 'VP8',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _build_brighcove_url(cls, object_str):
|
||||
@@ -238,7 +234,8 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _make_brightcove_url(cls, params):
|
||||
return update_url_query(cls._FEDERATED_URL, params)
|
||||
return update_url_query(
|
||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||
|
||||
@classmethod
|
||||
def _extract_brightcove_url(cls, webpage):
|
||||
@@ -297,38 +294,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
videoPlayer = query.get('@videoPlayer')
|
||||
if videoPlayer:
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
|
||||
video_id = videoPlayer[0]
|
||||
if 'playerID' not in query:
|
||||
mobj = re.search(r'/bcpid(\d+)', url)
|
||||
if mobj is not None:
|
||||
query['playerID'] = [mobj.group(1)]
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query, referer=referer)
|
||||
elif 'playerKey' in query:
|
||||
player_key = query['playerKey']
|
||||
return self._get_playlist_info(player_key[0])
|
||||
else:
|
||||
raise ExtractorError(
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
if linkBase is not None:
|
||||
referer = linkBase[0]
|
||||
if referer is not None:
|
||||
headers['Referer'] = referer
|
||||
webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
|
||||
|
||||
error_msg = self._html_search_regex(
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
@@ -339,6 +310,9 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
@@ -349,141 +323,21 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
self.report_extraction(video_id)
|
||||
info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
|
||||
info = json.loads(info)['data']
|
||||
video_info = info['programmedContent']['videoPlayer']['mediaDTO']
|
||||
video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
|
||||
|
||||
return self._extract_video_info(video_info)
|
||||
|
||||
def _get_playlist_info(self, player_key):
|
||||
info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
|
||||
playlist_info = self._download_webpage(
|
||||
info_url, player_key, 'Downloading playlist information')
|
||||
|
||||
json_data = json.loads(playlist_info)
|
||||
if 'videoList' in json_data:
|
||||
playlist_info = json_data['videoList']
|
||||
playlist_dto = playlist_info['mediaCollectionDTO']
|
||||
elif 'playlistTabs' in json_data:
|
||||
playlist_info = json_data['playlistTabs']
|
||||
playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
|
||||
else:
|
||||
raise ExtractorError('Empty playlist')
|
||||
|
||||
videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
|
||||
|
||||
return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
|
||||
playlist_title=playlist_dto['displayName'])
|
||||
|
||||
def _extract_video_info(self, video_info):
|
||||
video_id = compat_str(video_info['id'])
|
||||
publisher_id = video_info.get('publisherId')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video_info['displayName'].strip(),
|
||||
'description': video_info.get('shortDescription'),
|
||||
'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
|
||||
'uploader': video_info.get('publisherName'),
|
||||
'uploader_id': compat_str(publisher_id) if publisher_id else None,
|
||||
'duration': float_or_none(video_info.get('length'), 1000),
|
||||
'timestamp': int_or_none(video_info.get('creationDate'), 1000),
|
||||
}
|
||||
|
||||
renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
|
||||
if renditions:
|
||||
formats = []
|
||||
for rend in renditions:
|
||||
url = rend['defaultURL']
|
||||
if not url:
|
||||
continue
|
||||
ext = None
|
||||
if rend['remote']:
|
||||
url_comp = compat_urllib_parse_urlparse(url)
|
||||
if url_comp.path.endswith('.m3u8'):
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
elif 'akamaihd.net' in url_comp.netloc:
|
||||
# This type of renditions are served through
|
||||
# akamaihd.net, but they don't use f4m manifests
|
||||
url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
|
||||
ext = 'flv'
|
||||
if ext is None:
|
||||
ext = determine_ext(url)
|
||||
tbr = int_or_none(rend.get('encodingRate'), 1000)
|
||||
a_format = {
|
||||
'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
|
||||
'url': url,
|
||||
'ext': ext,
|
||||
'filesize': int_or_none(rend.get('size')) or None,
|
||||
'tbr': tbr,
|
||||
}
|
||||
if rend.get('audioOnly'):
|
||||
a_format.update({
|
||||
'vcodec': 'none',
|
||||
})
|
||||
else:
|
||||
a_format.update({
|
||||
'height': int_or_none(rend.get('frameHeight')),
|
||||
'width': int_or_none(rend.get('frameWidth')),
|
||||
'vcodec': rend.get('videoCodec'),
|
||||
})
|
||||
|
||||
# m3u8 manifests with remote == false are media playlists
|
||||
# Not calling _extract_m3u8_formats here to save network traffic
|
||||
if ext == 'm3u8':
|
||||
a_format.update({
|
||||
'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
|
||||
formats.append(a_format)
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
elif video_info.get('FLVFullLengthURL') is not None:
|
||||
info.update({
|
||||
'url': video_info['FLVFullLengthURL'],
|
||||
'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
|
||||
'filesize': int_or_none(video_info.get('FLVFullSize')),
|
||||
})
|
||||
|
||||
if self._downloader.params.get('include_ads', False):
|
||||
adServerURL = video_info.get('_youtubedl_adServerURL')
|
||||
if adServerURL:
|
||||
ad_info = {
|
||||
'_type': 'url',
|
||||
'url': adServerURL,
|
||||
}
|
||||
if 'url' in info:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': info['title'],
|
||||
'entries': [ad_info, info],
|
||||
}
|
||||
else:
|
||||
return ad_info
|
||||
|
||||
if not info.get('url') and not info.get('formats'):
|
||||
uploader_id = info.get('uploader_id')
|
||||
if uploader_id:
|
||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
if publisher_id:
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
if referer:
|
||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
# TODO: figure out if it's possible to extract playlistId from playerKey
|
||||
# elif 'playerKey' in query:
|
||||
# player_key = query['playerKey']
|
||||
# return self._get_playlist_info(player_key[0])
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
||||
class BrightcoveNewIE(AdobePassIE):
|
||||
IE_NAME = 'brightcove:new'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
|
||||
'md5': 'c8100925723840d4b0d243f7025703be',
|
||||
@@ -516,6 +370,21 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# playlist stream
|
||||
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
|
||||
'info_dict': {
|
||||
'id': '5718313430001',
|
||||
'title': 'No Audio Playlist',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# ref: prefixed video id
|
||||
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
|
||||
@@ -715,7 +584,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
|
||||
})
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
||||
@@ -736,7 +605,7 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
|
||||
webpage, 'policy key', group='pk')
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||
headers = {
|
||||
'Accept': 'application/json;pk=%s' % policy_key,
|
||||
}
|
||||
@@ -771,5 +640,12 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'tveToken': tve_token,
|
||||
})
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
return self._parse_brightcove_metadata(
|
||||
json_data, video_id, headers=headers)
|
||||
|
@@ -3,7 +3,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BYUtvIE(InfoExtractor):
|
||||
@@ -51,7 +56,7 @@ class BYUtvIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
info = self._download_json(
|
||||
video = self._download_json(
|
||||
'https://api.byutv.org/api3/catalog/getvideosforcontent',
|
||||
display_id, query={
|
||||
'contentid': video_id,
|
||||
@@ -62,7 +67,7 @@ class BYUtvIE(InfoExtractor):
|
||||
'x-byutv-platformkey': 'xsaaw9c7y5',
|
||||
})
|
||||
|
||||
ep = info.get('ooyalaVOD')
|
||||
ep = video.get('ooyalaVOD')
|
||||
if ep:
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -75,18 +80,38 @@ class BYUtvIE(InfoExtractor):
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
}
|
||||
|
||||
ep = info['dvr']
|
||||
title = ep['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
info = {}
|
||||
formats = []
|
||||
for format_id, ep in video.items():
|
||||
if not isinstance(ep, dict):
|
||||
continue
|
||||
video_url = url_or_none(ep.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
merge_dicts(info, {
|
||||
'title': ep.get('title'),
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
'duration': parse_duration(ep.get('length')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': ep.get('description'),
|
||||
'thumbnail': ep.get('imageThumbnail'),
|
||||
'duration': parse_duration(ep.get('length')),
|
||||
'title': display_id,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -17,7 +17,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class CanvasIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
||||
@@ -35,6 +35,10 @@ class CanvasIE(InfoExtractor):
|
||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||
'HLS': 'm3u8_native',
|
||||
'HLS_AES': 'm3u8',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -52,9 +56,9 @@ class CanvasIE(InfoExtractor):
|
||||
format_url, format_type = target.get('url'), target.get('type')
|
||||
if not format_url or not format_type:
|
||||
continue
|
||||
if format_type == 'HLS':
|
||||
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
|
||||
m3u8_id=format_type, fatal=False))
|
||||
elif format_type == 'HDS':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
|
@@ -69,7 +69,7 @@ class CBSIE(CBSBaseIE):
|
||||
last_e = None
|
||||
for item in items_data.findall('.//item'):
|
||||
asset_type = xpath_text(item, 'assetType')
|
||||
if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'):
|
||||
if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
|
||||
continue
|
||||
asset_types.append(asset_type)
|
||||
query = {
|
||||
|
@@ -1,40 +1,62 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import zlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .cbs import CBSIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class CBSNewsEmbedIE(CBSIE):
|
||||
IE_NAME = 'cbsnews:embed'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||
compat_urllib_parse_unquote(self._match_id(url))),
|
||||
-zlib.MAX_WBITS), None)['video']['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
class CBSNewsIE(CBSIE):
|
||||
IE_NAME = 'cbsnews'
|
||||
IE_DESC = 'CBS News'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
# 60 minutes
|
||||
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
|
||||
'info_dict': {
|
||||
'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
|
||||
'ext': 'mp4',
|
||||
'title': 'Artificial Intelligence',
|
||||
'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
|
||||
'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
|
||||
'ext': 'flv',
|
||||
'title': 'Artificial Intelligence, real-life applications',
|
||||
'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1606,
|
||||
'duration': 317,
|
||||
'uploader': 'CBSI-NEW',
|
||||
'timestamp': 1498431900,
|
||||
'upload_date': '20170625',
|
||||
'timestamp': 1476046464,
|
||||
'upload_date': '20161009',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
|
||||
'info_dict': {
|
||||
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
|
||||
'ext': 'mp4',
|
||||
@@ -60,37 +82,29 @@ class CBSNewsIE(CBSIE):
|
||||
# 48 hours
|
||||
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
|
||||
'info_dict': {
|
||||
'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cold as Ice',
|
||||
'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
|
||||
'upload_date': '20170604',
|
||||
'timestamp': 1496538000,
|
||||
'uploader': 'CBSI-NEW',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_info = self._parse_json(self._html_search_regex(
|
||||
r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
|
||||
webpage, 'video JSON info', default='{}'), video_id, fatal=False)
|
||||
|
||||
if video_info:
|
||||
item = video_info['item'] if 'item' in video_info else video_info
|
||||
else:
|
||||
state = self._parse_json(self._search_regex(
|
||||
r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
|
||||
'playlist JSON info', group='json'), video_id)['state']
|
||||
item = state['playlist'][state['pid']]
|
||||
entries = []
|
||||
for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
|
||||
entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||
playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
|
||||
|
||||
item = self._parse_json(self._html_search_regex(
|
||||
r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
|
||||
webpage, 'video JSON info'), display_id)['items'][0]
|
||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||
|
||||
|
||||
|
@@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
is_live = item.get('type') == 'LIVE'
|
||||
formats = []
|
||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||
if 'drmOnly=true' in stream_url:
|
||||
continue
|
||||
if 'playerType=flash' in stream_url:
|
||||
stream_formats = self._extract_m3u8_formats(
|
||||
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||
|
@@ -7,7 +7,7 @@ from ..utils import ExtractorError
|
||||
|
||||
|
||||
class ChaturbateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.chaturbate.com/siswet19/',
|
||||
'info_dict': {
|
||||
@@ -21,6 +21,9 @@ class ChaturbateIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Room is offline',
|
||||
}, {
|
||||
'url': 'https://chaturbate.com/fullvideo/?b=caylin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
@@ -32,7 +35,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=self.geo_verification_headers())
|
||||
'https://chaturbate.com/%s/' % video_id, video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
m3u8_urls = []
|
||||
|
||||
|
@@ -67,6 +67,7 @@ from ..utils import (
|
||||
sanitized_Request,
|
||||
sanitize_filename,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
@@ -219,7 +220,7 @@ class InfoExtractor(object):
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
* "resolution" (optional, string "{width}x{height"},
|
||||
* "resolution" (optional, string "{width}x{height}",
|
||||
deprecated)
|
||||
* "filesize" (optional, int)
|
||||
thumbnail: Full URL to a video thumbnail image.
|
||||
@@ -1423,12 +1424,10 @@ class InfoExtractor(object):
|
||||
try:
|
||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||
return True
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_urllib_error.URLError):
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
return False
|
||||
raise
|
||||
except ExtractorError:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
||||
return False
|
||||
|
||||
def http_scheme(self):
|
||||
""" Either "http:" or "https:", depending on the user's preferences """
|
||||
@@ -2480,7 +2479,7 @@ class InfoExtractor(object):
|
||||
'subtitles': {},
|
||||
}
|
||||
media_attributes = extract_attributes(media_tag)
|
||||
src = media_attributes.get('src')
|
||||
src = strip_or_none(media_attributes.get('src'))
|
||||
if src:
|
||||
_, formats = _media_formats(src, media_type)
|
||||
media_info['formats'].extend(formats)
|
||||
@@ -2490,7 +2489,7 @@ class InfoExtractor(object):
|
||||
s_attr = extract_attributes(source_tag)
|
||||
# data-video-src and data-src are non standard but seen
|
||||
# several times in the wild
|
||||
src = dict_get(s_attr, ('src', 'data-video-src', 'data-src'))
|
||||
src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
|
||||
if not src:
|
||||
continue
|
||||
f = parse_content_type(s_attr.get('type'))
|
||||
@@ -2533,7 +2532,7 @@ class InfoExtractor(object):
|
||||
track_attributes = extract_attributes(track_tag)
|
||||
kind = track_attributes.get('kind')
|
||||
if not kind or kind in ('subtitles', 'captions'):
|
||||
src = track_attributes.get('src')
|
||||
src = strip_or_none(track_attributes.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
||||
@@ -2817,6 +2816,33 @@ class InfoExtractor(object):
|
||||
self._downloader.cookiejar.add_cookie_header(req)
|
||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
||||
|
||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||
"""
|
||||
Apply first Set-Cookie header instead of the last. Experimental.
|
||||
|
||||
Some sites (e.g. [1-3]) may serve two cookies under the same name
|
||||
in Set-Cookie header and expect the first (old) one to be set rather
|
||||
than second (new). However, as of RFC6265 the newer one cookie
|
||||
should be set into cookie store what actually happens.
|
||||
We will workaround this issue by resetting the cookie to
|
||||
the first one manually.
|
||||
1. https://new.vk.com/
|
||||
2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
|
||||
3. https://learning.oreilly.com/
|
||||
"""
|
||||
for header, cookies in url_handle.headers.items():
|
||||
if header.lower() != 'set-cookie':
|
||||
continue
|
||||
if sys.version_info[0] >= 3:
|
||||
cookies = cookies.encode('iso-8859-1')
|
||||
cookies = cookies.decode('utf-8')
|
||||
cookie_value = re.search(
|
||||
r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
|
||||
if cookie_value:
|
||||
value, domain = cookie_value.groups()
|
||||
self._set_cookie(domain, cookie, value)
|
||||
break
|
||||
|
||||
def get_testcases(self, include_onlymatching=False):
|
||||
t = getattr(self, '_TEST', None)
|
||||
if t:
|
||||
|
118
youtube_dl/extractor/contv.py
Normal file
118
youtube_dl/extractor/contv.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CONtvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
|
||||
'info_dict': {
|
||||
'id': 'CEG10022949',
|
||||
'ext': 'mp4',
|
||||
'title': 'Days Of Thrills & Laughter',
|
||||
'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
|
||||
'upload_date': '20180703',
|
||||
'timestamp': 1530634789.61,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
|
||||
'info_dict': {
|
||||
'id': 'CLIP-show_fotld_bts',
|
||||
'title': 'Fight of the Living Dead: Behind the Scenes Bites',
|
||||
},
|
||||
'playlist_mincount': 7,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
details = self._download_json(
|
||||
'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
|
||||
video_id, query={'device': 'web'})
|
||||
|
||||
if details.get('type') == 'episodic':
|
||||
seasons = self._download_json(
|
||||
'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
|
||||
video_id)
|
||||
entries = []
|
||||
for season in seasons:
|
||||
for episode in season.get('episodes', []):
|
||||
episode_id = episode.get('id')
|
||||
if not episode_id:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'https://www.contv.com/details-movie/' + episode_id,
|
||||
CONtvIE.ie_key(), episode_id))
|
||||
return self.playlist_result(entries, video_id, details.get('title'))
|
||||
|
||||
m_details = details['details']
|
||||
title = details['title']
|
||||
|
||||
formats = []
|
||||
|
||||
media_hls_url = m_details.get('media_hls_url')
|
||||
if media_hls_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_hls_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
media_mp4_url = m_details.get('media_mp4_url')
|
||||
if media_mp4_url:
|
||||
formats.append({
|
||||
'format_id': 'http',
|
||||
'url': media_mp4_url,
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
captions = m_details.get('captions') or {}
|
||||
for caption_url in captions.values():
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': caption_url
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in m_details.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
description = None
|
||||
for p in ('large_', 'medium_', 'small_', ''):
|
||||
d = m_details.get(p + 'description')
|
||||
if d:
|
||||
description = d
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': description,
|
||||
'timestamp': float_or_none(details.get('metax_added_on'), 1000),
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(m_details.get('duration'), 1000),
|
||||
'view_count': int_or_none(details.get('num_watched')),
|
||||
'like_count': int_or_none(details.get('num_fav')),
|
||||
'categories': details.get('category'),
|
||||
'tags': details.get('tags'),
|
||||
'season_number': int_or_none(details.get('season')),
|
||||
'episode_number': int_or_none(details.get('episode')),
|
||||
'release_year': int_or_none(details.get('pub_year')),
|
||||
}
|
@@ -1,39 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CriterionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.criterion.com/films/184-le-samourai',
|
||||
'md5': 'bc51beba55685509883a9a7830919ec3',
|
||||
'info_dict': {
|
||||
'id': '184',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le Samouraï',
|
||||
'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
final_url = self._search_regex(
|
||||
r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
thumbnail = self._search_regex(
|
||||
r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
|
||||
webpage, 'thumbnail url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': final_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -103,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
# should be imposed or not (from what I can see it just takes the first language
|
||||
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _add_skip_wall(url):
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
@@ -269,6 +256,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
'1080': ('80', '108'),
|
||||
}
|
||||
|
||||
def _download_webpage(self, url_or_request, *args, **kwargs):
|
||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else sanitized_Request(url_or_request))
|
||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
|
||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||
# should be imposed or not (from what I can see it just takes the first language
|
||||
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||
# Crunchyroll to not work in georestriction cases in some browsers that don't place
|
||||
# the locale lang first in header. However allowing any language seems to workaround the issue.
|
||||
request.add_header('Accept-Language', '*')
|
||||
return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
|
||||
|
||||
def _decrypt_subtitles(self, data, iv, id):
|
||||
data = bytes_to_intlist(compat_b64decode(data))
|
||||
iv = bytes_to_intlist(compat_b64decode(iv))
|
||||
@@ -661,9 +661,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
webpage = self._download_webpage(
|
||||
self._add_skip_wall(url), show_id,
|
||||
headers=self.geo_verification_headers())
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
|
||||
webpage, 'title')
|
||||
title = self._html_search_meta('name', webpage, default=None)
|
||||
|
||||
episode_paths = re.findall(
|
||||
r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
|
||||
webpage)
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
@@ -14,8 +15,8 @@ class CtsNewsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '201501291578109',
|
||||
'ext': 'mp4',
|
||||
'title': '以色列.真主黨交火 3人死亡',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
|
||||
'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
|
||||
'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
|
||||
'timestamp': 1422528540,
|
||||
'upload_date': '20150129',
|
||||
}
|
||||
@@ -26,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '201309031304098',
|
||||
'ext': 'mp4',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩',
|
||||
'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
|
||||
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1378205880,
|
||||
@@ -62,8 +63,7 @@ class CtsNewsIE(InfoExtractor):
|
||||
video_url = mp4_feed['source_url']
|
||||
else:
|
||||
self.to_screen('Not CTSPlayer video, trying Youtube...')
|
||||
youtube_url = self._search_regex(
|
||||
r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
|
||||
youtube_url = YoutubeIE._extract_url(page)
|
||||
|
||||
return self.url_result(youtube_url, ie='Youtube')
|
||||
|
||||
|
@@ -48,7 +48,14 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
|
||||
class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
|
||||
_VALID_URL = r'''(?ix)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
|
||||
(?:www\.)?lequipe\.fr/video
|
||||
)
|
||||
/(?P<id>[^/?_]+)
|
||||
'''
|
||||
IE_NAME = 'dailymotion'
|
||||
|
||||
_FORMATS = [
|
||||
@@ -133,14 +140,26 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lequipe.fr/video/x791mem',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
urls = []
|
||||
# Look for embedded Dailymotion player
|
||||
matches = re.findall(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
|
||||
return list(map(lambda m: unescapeHTML(m[1]), matches))
|
||||
# https://developer.dailymotion.com/player#player-parameters
|
||||
for mobj in re.finditer(
|
||||
r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
|
||||
urls.append(unescapeHTML(mobj.group('url')))
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
|
||||
return urls
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -2,25 +2,21 @@
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
xpath_text,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class DaumIE(InfoExtractor):
|
||||
class DaumBaseIE(InfoExtractor):
|
||||
_KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
|
||||
|
||||
|
||||
class DaumIE(DaumBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
|
||||
IE_NAME = 'daum.net'
|
||||
|
||||
@@ -36,6 +32,9 @@ class DaumIE(InfoExtractor):
|
||||
'duration': 2117,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader_id': 186139,
|
||||
'uploader': '콘간지',
|
||||
'timestamp': 1387310323,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/v/65139429',
|
||||
@@ -44,11 +43,14 @@ class DaumIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
|
||||
'description': 'md5:79794514261164ff27e36a21ad229fc5',
|
||||
'upload_date': '20150604',
|
||||
'upload_date': '20150118',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 154,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'MBC 예능',
|
||||
'uploader_id': 132251,
|
||||
'timestamp': 1421604228,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
|
||||
@@ -59,12 +61,15 @@ class DaumIE(InfoExtractor):
|
||||
'id': 'vwIpVpCQsT8$',
|
||||
'ext': 'flv',
|
||||
'title': '01-Korean War ( Trouble on the horizon )',
|
||||
'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
|
||||
'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
|
||||
'upload_date': '20080223',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 249,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': '까칠한 墮落始祖 황비홍님의',
|
||||
'uploader_id': 560824,
|
||||
'timestamp': 1203770745,
|
||||
},
|
||||
}, {
|
||||
# Requires dte_type=WEB (#9972)
|
||||
@@ -73,60 +78,24 @@ class DaumIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
|
||||
'ext': 'mp4',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
|
||||
'upload_date': '20160611',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'upload_date': '20170129',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'uploader_id': 2653210,
|
||||
'timestamp': 1485684628,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = compat_urllib_parse_unquote(self._match_id(url))
|
||||
movie_data = self._download_json(
|
||||
'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
|
||||
video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
|
||||
|
||||
# For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
|
||||
if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
|
||||
return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
|
||||
|
||||
info = self._download_xml(
|
||||
'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
|
||||
'Downloading video info', query={'vid': video_id})
|
||||
|
||||
formats = []
|
||||
for format_el in movie_data['output_list']['output_list']:
|
||||
profile = format_el['profile']
|
||||
format_query = compat_urllib_parse_urlencode({
|
||||
'vid': video_id,
|
||||
'profile': profile,
|
||||
})
|
||||
url_doc = self._download_xml(
|
||||
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||
video_id, note='Downloading video data for %s format' % profile)
|
||||
format_url = url_doc.find('result/url').text
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': profile,
|
||||
'width': int_or_none(format_el.get('width')),
|
||||
'height': int_or_none(format_el.get('height')),
|
||||
'filesize': int_or_none(format_el.get('filesize')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.find('TITLE').text,
|
||||
'formats': formats,
|
||||
'thumbnail': xpath_text(info, 'THUMB_URL'),
|
||||
'description': xpath_text(info, 'CONTENTS'),
|
||||
'duration': int_or_none(xpath_text(info, 'DURATION')),
|
||||
'upload_date': info.find('REGDTTM').text[:8],
|
||||
'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
|
||||
'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
|
||||
}
|
||||
if not video_id.isdigit():
|
||||
video_id += '@my'
|
||||
return self.url_result(
|
||||
self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
|
||||
|
||||
|
||||
class DaumClipIE(InfoExtractor):
|
||||
class DaumClipIE(DaumBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
|
||||
IE_NAME = 'daum.net:clip'
|
||||
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
|
||||
@@ -142,6 +111,9 @@ class DaumClipIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'duration': 3868,
|
||||
'view_count': int,
|
||||
'uploader': 'GOMeXP',
|
||||
'uploader_id': 6667,
|
||||
'timestamp': 1377911092,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
|
||||
@@ -154,22 +126,8 @@ class DaumClipIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
clip_info = self._download_json(
|
||||
'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id,
|
||||
video_id, 'Downloading clip info')['clip_bean']
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
|
||||
'title': unescapeHTML(clip_info['title']),
|
||||
'thumbnail': clip_info.get('thumb_url'),
|
||||
'description': clip_info.get('contents'),
|
||||
'duration': int_or_none(clip_info.get('duration')),
|
||||
'upload_date': clip_info.get('up_date')[:8],
|
||||
'view_count': int_or_none(clip_info.get('play_count')),
|
||||
'ie_key': 'Daum',
|
||||
}
|
||||
return self.url_result(
|
||||
self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
|
||||
|
||||
|
||||
class DaumListIE(InfoExtractor):
|
||||
|
@@ -7,50 +7,51 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class DBTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'md5': '2e24f67936517b143a234b4cadf792ec',
|
||||
'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
|
||||
'md5': 'b8f850ba1860adbda668d367f9b77699',
|
||||
'info_dict': {
|
||||
'id': '3649835190001',
|
||||
'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
|
||||
'id': 'PynxJnNWChE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
|
||||
'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
|
||||
'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'timestamp': 1404039863,
|
||||
'upload_date': '20140629',
|
||||
'duration': 69.544,
|
||||
'uploader_id': '1027729757001',
|
||||
'upload_date': '20160916',
|
||||
'duration': 69,
|
||||
'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
|
||||
'uploader': 'Dagbladet',
|
||||
},
|
||||
'add_ie': ['BrightcoveNew']
|
||||
'add_ie': ['Youtube']
|
||||
}, {
|
||||
'url': 'http://dbtv.no/3649835190001',
|
||||
'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/vice/5000634109001',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://dbtv.no/filmtrailer/3359293614001',
|
||||
'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [url for _, url in re.findall(
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
|
||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
return {
|
||||
display_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
info = {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
if len(video_id) == 11:
|
||||
info.update({
|
||||
'url': video_id,
|
||||
'ie_key': 'Youtube',
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'url': 'jwplatform:' + video_id,
|
||||
'ie_key': 'JWPlatform',
|
||||
})
|
||||
return info
|
||||
|
@@ -5,23 +5,17 @@ import re
|
||||
import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
(?:(?:www|go)\.)?discovery|
|
||||
(?:www\.)?
|
||||
(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
@@ -40,15 +34,15 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
|
||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
|
||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||
'info_dict': {
|
||||
'id': '5a2d9b4d6b66d17a5026e1fd',
|
||||
'id': '5a2f35ce6b66d17a5026e29e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dave Foley',
|
||||
'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
|
||||
'duration': 608,
|
||||
'title': 'Riding with Matthew Perry',
|
||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||
'duration': 84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
@@ -56,20 +50,20 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# using `show_slug` is important to get the correct video data
|
||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
react_data = self._parse_json(self._search_regex(
|
||||
r'window\.__reactTransmitPacket\s*=\s*({.+?});',
|
||||
webpage, 'react data'), display_id)
|
||||
content_blocks = react_data['layout'][path]['contentBlocks']
|
||||
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
||||
video_id = video['id']
|
||||
site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
@@ -79,27 +73,36 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
||||
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
||||
video_id, fatal=False) or {}
|
||||
display_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://%s.com/anonymous' % site, display_id, query={
|
||||
'https://%s.com/anonymous' % site, display_id,
|
||||
'Downloading token JSON metadata', query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
try:
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + 'content/videos',
|
||||
display_id, 'Downloading content JSON metadata',
|
||||
headers=headers, query={
|
||||
'embed': 'show.name',
|
||||
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
||||
'slug': display_id,
|
||||
'show_slug': show_slug,
|
||||
})[0]
|
||||
video_id = video['id']
|
||||
stream = self._download_json(
|
||||
'https://api.discovery.com/v1/streaming/video/' + video_id,
|
||||
display_id, headers=headers)
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
|
@@ -3,63 +3,38 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from .dplay import DPlayIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
|
||||
(?:
|
||||
.*\#(?P<id>\d+)|
|
||||
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
|
||||
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
|
||||
)'''
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
'info_dict': {
|
||||
'id': '3235167922001',
|
||||
'id': '78867',
|
||||
'ext': 'mp4',
|
||||
'title': 'Breaking Amish: Die Welt da draußen',
|
||||
'description': (
|
||||
'Vier Amische und eine Mennonitin wagen in New York'
|
||||
' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
|
||||
' ihrem spannenden Weg.'),
|
||||
'timestamp': 1396598084,
|
||||
'upload_date': '20140404',
|
||||
'uploader_id': '1659832546',
|
||||
'title': 'Die Welt da draußen',
|
||||
'description': 'md5:61033c12b73286e409d99a41742ef608',
|
||||
'timestamp': 1554069600,
|
||||
'upload_date': '20190331',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
|
||||
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.discovery.de/#5332316765001',
|
||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||
'only_matching': True,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
alternate_id = mobj.group('alternate_id')
|
||||
if alternate_id:
|
||||
self._initialize_geo_bypass({
|
||||
'countries': ['DE'],
|
||||
})
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (mobj.group('programme'), alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
|
||||
brightcove_id = mobj.group('id')
|
||||
if not brightcove_id:
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
|
||||
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
|
||||
brightcove_legacy_url).query)['@videoPlayer'][0]
|
||||
return self.url_result(smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
|
||||
'BrightcoveNew', brightcove_id)
|
||||
domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
|
||||
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
|
||||
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (programme, alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', realm, country)
|
||||
|
97
youtube_dl/extractor/dlive.py
Normal file
97
youtube_dl/extractor/dlive.py
Normal file
@@ -0,0 +1,97 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class DLiveVODIE(InfoExtractor):
|
||||
IE_NAME = 'dlive:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
|
||||
'info_dict': {
|
||||
'id': '3mTzOl4WR',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minecraft with james charles epic',
|
||||
'upload_date': '20190701',
|
||||
'timestamp': 1562011015,
|
||||
'uploader_id': 'pdp',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
|
||||
broadcast = self._download_json(
|
||||
'https://graphigo.prd.dlive.tv/', vod_id,
|
||||
data=json.dumps({'query': '''query {
|
||||
pastBroadcast(permlink:"%s+%s") {
|
||||
content
|
||||
createdAt
|
||||
length
|
||||
playbackUrl
|
||||
title
|
||||
thumbnailUrl
|
||||
viewCount
|
||||
}
|
||||
}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
|
||||
title = broadcast['title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': vod_id,
|
||||
'title': title,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'description': broadcast.get('content'),
|
||||
'thumbnail': broadcast.get('thumbnailUrl'),
|
||||
'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
|
||||
'view_count': int_or_none(broadcast.get('viewCount')),
|
||||
}
|
||||
|
||||
|
||||
class DLiveStreamIE(InfoExtractor):
|
||||
IE_NAME = 'dlive:stream'
|
||||
_VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_name = self._match_id(url)
|
||||
user = self._download_json(
|
||||
'https://graphigo.prd.dlive.tv/', display_name,
|
||||
data=json.dumps({'query': '''query {
|
||||
userByDisplayName(displayname:"%s") {
|
||||
livestream {
|
||||
content
|
||||
createdAt
|
||||
title
|
||||
thumbnailUrl
|
||||
watchingCount
|
||||
}
|
||||
username
|
||||
}
|
||||
}''' % display_name}).encode())['data']['userByDisplayName']
|
||||
livestream = user['livestream']
|
||||
title = livestream['title']
|
||||
username = user['username']
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
|
||||
display_name, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': display_name,
|
||||
'title': self._live_title(title),
|
||||
'uploader': display_name,
|
||||
'uploader_id': username,
|
||||
'formats': formats,
|
||||
'description': livestream.get('content'),
|
||||
'thumbnail': livestream.get('thumbnailUrl'),
|
||||
'is_live': True,
|
||||
'timestamp': int_or_none(livestream.get('createdAt'), 1000),
|
||||
'view_count': int_or_none(livestream.get('watchingCount')),
|
||||
}
|
@@ -1,74 +1,68 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
|
||||
class DPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<domain>
|
||||
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
|
||||
(?P<subdomain_country>es|it)\.dplay\.com
|
||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
||||
|
||||
_TESTS = [{
|
||||
# non geo restricted, via secure api, unsigned download hls URL
|
||||
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
|
||||
'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||
'info_dict': {
|
||||
'id': '3172',
|
||||
'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
|
||||
'id': '13628',
|
||||
'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||
'ext': 'mp4',
|
||||
'title': 'Svensken lär sig njuta av livet',
|
||||
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
|
||||
'duration': 2650,
|
||||
'timestamp': 1365454320,
|
||||
'duration': 2649.856,
|
||||
'timestamp': 1365453720,
|
||||
'upload_date': '20130408',
|
||||
'creator': 'Kanal 5 (Home)',
|
||||
'creator': 'Kanal 5',
|
||||
'series': 'Nugammalt - 77 händelser som format Sverige',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# geo restricted, via secure api, unsigned download hls URL
|
||||
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
|
||||
'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||
'info_dict': {
|
||||
'id': '70816',
|
||||
'display_id': 'mig-og-min-mor/season-6-episode-12',
|
||||
'id': '104465',
|
||||
'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 12',
|
||||
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
|
||||
'duration': 2563,
|
||||
'timestamp': 1429696800,
|
||||
'upload_date': '20150422',
|
||||
'creator': 'Kanal 4 (Home)',
|
||||
'series': 'Mig og min mor',
|
||||
'season_number': 6,
|
||||
'episode_number': 12,
|
||||
'age_limit': 0,
|
||||
'title': 'Ted Bundy: Mind Of A Monster',
|
||||
'description': 'md5:8b780f6f18de4dae631668b8a9637995',
|
||||
'duration': 5290.027,
|
||||
'timestamp': 1570694400,
|
||||
'upload_date': '20191010',
|
||||
'creator': 'ID - Investigation Discovery',
|
||||
'series': 'Ted Bundy: Mind Of A Monster',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# geo restricted, via direct unsigned hls URL
|
||||
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# disco-api
|
||||
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||
@@ -89,19 +83,59 @@ class DPlayIE(InfoExtractor):
|
||||
'format': 'bestvideo',
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Available for Premium users',
|
||||
}, {
|
||||
|
||||
'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
|
||||
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
|
||||
'md5': '2b808ffb00fc47b884a172ca5d13053c',
|
||||
'info_dict': {
|
||||
'id': '6918',
|
||||
'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||
'ext': 'mp4',
|
||||
'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
|
||||
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'upload_date': '20160524',
|
||||
'timestamp': 1464076800,
|
||||
'series': 'Biografie imbarazzanti',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
|
||||
'info_dict': {
|
||||
'id': '21652',
|
||||
'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episodio 1',
|
||||
'description': 'md5:b9dcff2071086e003737485210675f69',
|
||||
'thumbnail': r're:^https?://.*\.png',
|
||||
'upload_date': '20180709',
|
||||
'timestamp': 1531173540,
|
||||
'series': 'La fiebre del oro',
|
||||
'season_number': 8,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
|
||||
'url': 'https://www.dplay.jp/video/gold-rush/24086',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm):
|
||||
disco_base = 'https://' + disco_host
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||
geo_countries = [country.upper()]
|
||||
self._initialize_geo_bypass({
|
||||
'countries': geo_countries,
|
||||
})
|
||||
disco_base = 'https://%s/' % disco_host
|
||||
token = self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
disco_base + 'token', display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
@@ -110,17 +144,30 @@ class DPlayIE(InfoExtractor):
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
disco_base + 'content/videos/' + display_id, display_id,
|
||||
headers=headers, query={
|
||||
'include': 'show'
|
||||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
title = info['name'].strip()
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id, headers=headers)['data']['attributes']['streaming'].items():
|
||||
try:
|
||||
streaming = self._download_json(
|
||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||
display_id, headers=headers)['data']['attributes']['streaming']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(countries=geo_countries)
|
||||
elif error_code == 'access.denied.missingpackage':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
raise
|
||||
for format_id, format_dict in streaming.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
@@ -142,235 +189,55 @@ class DPlayIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
creator = series = None
|
||||
tags = []
|
||||
thumbnails = []
|
||||
included = video.get('included') or []
|
||||
if isinstance(included, list):
|
||||
for e in included:
|
||||
attributes = e.get('attributes')
|
||||
if not attributes:
|
||||
continue
|
||||
e_type = e.get('type')
|
||||
if e_type == 'channel':
|
||||
creator = attributes.get('name')
|
||||
elif e_type == 'image':
|
||||
src = attributes.get('src')
|
||||
if src:
|
||||
thumbnails.append({
|
||||
'url': src,
|
||||
'width': int_or_none(attributes.get('width')),
|
||||
'height': int_or_none(attributes.get('height')),
|
||||
})
|
||||
if e_type == 'show':
|
||||
series = attributes.get('name')
|
||||
elif e_type == 'tag':
|
||||
name = attributes.get('name')
|
||||
if name:
|
||||
tags.append(name)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'creator': creator,
|
||||
'tags': tags,
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
self._initialize_geo_bypass({
|
||||
'countries': [mobj.group('country').upper()],
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
|
||||
|
||||
if not video_id:
|
||||
host = mobj.group('host')
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'disco-api.' + host, host.replace('.', ''))
|
||||
|
||||
info = self._download_json(
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
video_id)['data'][0]
|
||||
|
||||
title = info['title']
|
||||
|
||||
PROTOCOLS = ('hls', 'hds')
|
||||
formats = []
|
||||
|
||||
def extract_formats(protocol, manifest_url):
|
||||
if protocol == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
manifest_url, video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
|
||||
# Sometimes final URLs inside m3u8 are unsigned, let's fix this
|
||||
# ourselves. Also fragments' URLs are only served signed for
|
||||
# Safari user agent.
|
||||
query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
|
||||
for m3u8_format in m3u8_formats:
|
||||
m3u8_format.update({
|
||||
'url': update_url_query(m3u8_format['url'], query),
|
||||
'http_headers': {
|
||||
'User-Agent': USER_AGENTS['Safari'],
|
||||
},
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
elif protocol == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
|
||||
video_id, f4m_id=protocol, fatal=False))
|
||||
|
||||
domain_tld = domain.split('.')[-1]
|
||||
if domain_tld in ('se', 'dk', 'no'):
|
||||
for protocol in PROTOCOLS:
|
||||
# Providing dsc-geo allows to bypass geo restriction in some cases
|
||||
self._set_cookie(
|
||||
'secure.dplay.%s' % domain_tld, 'dsc-geo',
|
||||
json.dumps({
|
||||
'countryCode': domain_tld.upper(),
|
||||
'expiry': (time.time() + 20 * 60) * 1000,
|
||||
}))
|
||||
stream = self._download_json(
|
||||
'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
|
||||
% (domain_tld, video_id, protocol), video_id,
|
||||
'Downloading %s stream JSON' % protocol, fatal=False)
|
||||
if stream and stream.get(protocol):
|
||||
extract_formats(protocol, stream[protocol])
|
||||
|
||||
# The last resort is to try direct unsigned hls/hds URLs from info dictionary.
|
||||
# Sometimes this does work even when secure API with dsc-geo has failed (e.g.
|
||||
# http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
|
||||
if not formats:
|
||||
for protocol in PROTOCOLS:
|
||||
if info.get(protocol):
|
||||
extract_formats(protocol, info[protocol])
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
for lang in ('se', 'sv', 'da', 'nl', 'no'):
|
||||
for format_id in ('web_vtt', 'vtt', 'srt'):
|
||||
subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
|
||||
if subtitle_url:
|
||||
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('video_metadata_longDescription'),
|
||||
'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
|
||||
'timestamp': int_or_none(info.get('video_publish_date')),
|
||||
'creator': info.get('video_metadata_homeChannel'),
|
||||
'series': info.get('video_metadata_show'),
|
||||
'season_number': int_or_none(info.get('season')),
|
||||
'episode_number': int_or_none(info.get('episode')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DPlayItIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['IT']
|
||||
_TEST = {
|
||||
'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
|
||||
'md5': '2b808ffb00fc47b884a172ca5d13053c',
|
||||
'info_dict': {
|
||||
'id': '6918',
|
||||
'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
|
||||
'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'upload_date': '20160524',
|
||||
'series': 'Biografie imbarazzanti',
|
||||
'season_number': 1,
|
||||
'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
|
||||
'episode_number': 1,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' | Dplay')
|
||||
|
||||
video_id = None
|
||||
|
||||
info = self._search_regex(
|
||||
r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
|
||||
webpage, 'playback JSON', default=None)
|
||||
if info:
|
||||
for _ in range(2):
|
||||
info = self._parse_json(info, display_id, fatal=False)
|
||||
if not info:
|
||||
break
|
||||
else:
|
||||
video_id = try_get(info, lambda x: x['data']['id'])
|
||||
|
||||
if not info:
|
||||
info_url = self._search_regex(
|
||||
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
||||
webpage, 'info url', group='url')
|
||||
|
||||
info_url = urljoin(url, info_url)
|
||||
video_id = info_url.rpartition('/')[-1]
|
||||
|
||||
try:
|
||||
info = self._download_json(
|
||||
info_url, display_id, headers={
|
||||
'Authorization': 'Bearer %s' % self._get_cookies(url).get(
|
||||
'dplayit_token').value,
|
||||
'Referer': url,
|
||||
})
|
||||
if isinstance(info, compat_str):
|
||||
info = self._parse_json(info, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
error = info['errors'][0]
|
||||
if error.get('code') == 'access.denied.geoblocked':
|
||||
self.raise_geo_restricted(
|
||||
msg=error.get('detail'), countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||
raise
|
||||
|
||||
hls_url = info['data']['attributes']['streaming']['hls']['url']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||
webpage, 'series', fatal=False)
|
||||
episode = self._search_regex(
|
||||
r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
|
||||
webpage, 'episode', fatal=False)
|
||||
|
||||
mobj = re.search(
|
||||
r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
|
||||
webpage)
|
||||
if mobj:
|
||||
season_number = int(mobj.group('season_number'))
|
||||
episode_number = int(mobj.group('episode_number'))
|
||||
upload_date = unified_strdate(mobj.group('upload_date'))
|
||||
else:
|
||||
season_number = episode_number = upload_date = None
|
||||
|
||||
return {
|
||||
'id': compat_str(video_id or display_id),
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
domain = mobj.group('domain').lstrip('www.')
|
||||
country = mobj.group('country') or mobj.group('subdomain_country')
|
||||
host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, host, 'dplay' + country, country)
|
||||
|
@@ -24,7 +24,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['DK']
|
||||
IE_NAME = 'drtv'
|
||||
@@ -80,6 +80,9 @@ class DRTVIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -1,20 +1,17 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
'info_dict': {
|
||||
'id': '6646981/951bc60f',
|
||||
@@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
|
||||
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
protocol = mobj.group('protocol')
|
||||
|
||||
url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
|
||||
req = sanitized_Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1; cpc=10')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
files_base64 = self._search_regex(
|
||||
r'data-files="([^"]+)"', webpage, 'data files')
|
||||
|
||||
files = self._parse_json(
|
||||
compat_b64decode(files_base64).decode('utf-8'),
|
||||
video_id)
|
||||
video_id = self._match_id(url).replace('_', '/')
|
||||
item = self._download_json(
|
||||
'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
|
||||
video_id)['items'][0]
|
||||
title = item['title']
|
||||
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
} for format_id, video_url in files.items() if format_id != 'still']
|
||||
formats = []
|
||||
for variant in media.get('variants', []):
|
||||
uri = variant.get('uri')
|
||||
if not uri:
|
||||
continue
|
||||
version = variant.get('version')
|
||||
formats.append({
|
||||
'url': uri,
|
||||
'format_id': version,
|
||||
'quality': quality(version),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage) or self._og_search_title(webpage)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage) or self._og_search_description(webpage)
|
||||
thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
|
||||
thumbnails = []
|
||||
stills = item.get('stills') or {}
|
||||
for t in ('thumb', 'still'):
|
||||
for s in ('', '-medium', '-large'):
|
||||
still_id = t + s
|
||||
still_url = stills.get(still_id)
|
||||
if not still_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': still_id,
|
||||
'url': still_url,
|
||||
})
|
||||
|
||||
stats = item.get('stats') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
'description': item.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'like_count': int_or_none(stats.get('kudos_total')),
|
||||
'view_count': int_or_none(stats.get('views_total')),
|
||||
}
|
||||
|
@@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@@ -18,7 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
@@ -32,6 +33,12 @@ class EinthusanIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
@@ -41,7 +48,9 @@ class EinthusanIE(InfoExtractor):
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
@@ -53,7 +62,7 @@ class EinthusanIE(InfoExtractor):
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
|
||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
|
@@ -216,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor):
|
||||
_TEST = {
|
||||
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
|
||||
'info_dict': {
|
||||
'id': '21846851',
|
||||
'ext': 'mp4',
|
||||
'id': '56032156',
|
||||
'ext': 'flv',
|
||||
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
|
||||
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
|
||||
'timestamp': 1513960621,
|
||||
'upload_date': '20171222',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -234,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](?P<id>\d+)',
|
||||
webpage, 'video id', group='id')
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
|
||||
webpage, 'embed url')
|
||||
|
||||
return self.url_result(
|
||||
'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
|
||||
return self.url_result(embed_url, 'AbcNewsVideo')
|
||||
|
@@ -58,17 +58,8 @@ from .ard import (
|
||||
ARDMediathekIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTvIE,
|
||||
ArteTVPlus7IE,
|
||||
ArteTVCreativeIE,
|
||||
ArteTVConcertIE,
|
||||
ArteTVInfoIE,
|
||||
ArteTVFutureIE,
|
||||
ArteTVCinemaIE,
|
||||
ArteTVDDCIE,
|
||||
ArteTVMagazineIE,
|
||||
ArteTVEmbedIE,
|
||||
TheOperaPlatformIE,
|
||||
ArteTVPlaylistIE,
|
||||
)
|
||||
from .asiancrush import (
|
||||
@@ -89,7 +80,6 @@ from .awaan import (
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
from .bbc import (
|
||||
BBCCoUkIE,
|
||||
@@ -113,6 +103,8 @@ from .bild import BildIE
|
||||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BilibiliAudioIE,
|
||||
BilibiliAudioAlbumIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
@@ -173,6 +165,7 @@ from .cbs import CBSIE
|
||||
from .cbslocal import CBSLocalIE
|
||||
from .cbsinteractive import CBSInteractiveIE
|
||||
from .cbsnews import (
|
||||
CBSNewsEmbedIE,
|
||||
CBSNewsIE,
|
||||
CBSNewsLiveVideoIE,
|
||||
)
|
||||
@@ -237,10 +230,10 @@ from .commonprotocols import (
|
||||
RtmpIE,
|
||||
)
|
||||
from .condenast import CondeNastIE
|
||||
from .contv import CONtvIE
|
||||
from .corus import CorusIE
|
||||
from .cracked import CrackedIE
|
||||
from .crackle import CrackleIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollIE,
|
||||
@@ -283,10 +276,7 @@ from .douyutv import (
|
||||
DouyuShowIE,
|
||||
DouyuTVIE,
|
||||
)
|
||||
from .dplay import (
|
||||
DPlayIE,
|
||||
DPlayItIE,
|
||||
)
|
||||
from .dplay import DPlayIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
@@ -365,7 +355,6 @@ from .firsttv import FirstTVIE
|
||||
from .fivemin import FiveMinIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .flipagram import FlipagramIE
|
||||
from .folketinget import FolketingetIE
|
||||
from .footyroom import FootyRoomIE
|
||||
from .formula1 import Formula1IE
|
||||
@@ -376,7 +365,10 @@ from .fourtube import (
|
||||
FuxIE,
|
||||
)
|
||||
from .fox import FOXIE
|
||||
from .fox9 import FOX9IE
|
||||
from .fox9 import (
|
||||
FOX9IE,
|
||||
FOX9NewsIE,
|
||||
)
|
||||
from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
@@ -404,19 +396,11 @@ from .frontendmasters import (
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .funimation import FunimationIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
FunkChannelIE,
|
||||
)
|
||||
from .funnyordie import FunnyOrDieIE
|
||||
from .funk import FunkIE
|
||||
from .fusion import FusionIE
|
||||
from .fxnetworks import FXNetworksIE
|
||||
from .gaia import GaiaIE
|
||||
from .gameinformer import GameInformerIE
|
||||
from .gameone import (
|
||||
GameOneIE,
|
||||
GameOnePlaylistIE,
|
||||
)
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gaskrank import GaskrankIE
|
||||
@@ -432,7 +416,6 @@ from .globo import (
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .go import GoIE
|
||||
from .go90 import Go90IE
|
||||
from .godtube import GodTubeIE
|
||||
from .golem import GolemIE
|
||||
from .googledrive import GoogleDriveIE
|
||||
@@ -441,7 +424,6 @@ from .googlesearch import GoogleSearchIE
|
||||
from .goshgay import GoshgayIE
|
||||
from .gputechconf import GPUTechConfIE
|
||||
from .groupon import GrouponIE
|
||||
from .hark import HarkIE
|
||||
from .hbo import HBOIE
|
||||
from .hearthisat import HearThisAtIE
|
||||
from .heise import HeiseIE
|
||||
@@ -473,7 +455,6 @@ from .hungama import (
|
||||
HungamaSongIE,
|
||||
)
|
||||
from .hypem import HypemIE
|
||||
from .iconosquare import IconosquareIE
|
||||
from .ign import (
|
||||
IGNIE,
|
||||
OneUPIE,
|
||||
@@ -533,7 +514,6 @@ from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
from .krasview import KrasViewIE
|
||||
@@ -559,7 +539,6 @@ from .lcp import (
|
||||
LcpPlayIE,
|
||||
LcpIE,
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioIE,
|
||||
@@ -592,6 +571,7 @@ from .linkedin import (
|
||||
)
|
||||
from .linuxacademy import LinuxAcademyIE
|
||||
from .litv import LiTVIE
|
||||
from .livejournal import LiveJournalIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
LiveLeakEmbedIE,
|
||||
@@ -610,13 +590,11 @@ from .lynda import (
|
||||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .macgamestore import MacGameStoreIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makertv import MakerTVIE
|
||||
from .malltv import MallTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
@@ -654,7 +632,7 @@ from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .minoto import MinotoIE
|
||||
from .miomio import MioMioIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mit import TechTVMITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
@@ -682,7 +660,7 @@ from .mtv import (
|
||||
MTVVideoIE,
|
||||
MTVServicesEmbeddedIE,
|
||||
MTVDEIE,
|
||||
MTV81IE,
|
||||
MTVJapanIE,
|
||||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
@@ -745,7 +723,6 @@ from .nexx import (
|
||||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nhl import NHLIE
|
||||
@@ -772,13 +749,6 @@ from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
NowVideoIE,
|
||||
VideoWeedIE,
|
||||
WholeCloudIE,
|
||||
)
|
||||
from .nowness import (
|
||||
NownessIE,
|
||||
NownessPlaylistIE,
|
||||
@@ -896,12 +866,12 @@ from .polskieradio import (
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
from .pornhub import (
|
||||
PornHubIE,
|
||||
PornHubPlaylistIE,
|
||||
PornHubUserVideosIE,
|
||||
PornHubUserIE,
|
||||
PornHubPagedVideoListIE,
|
||||
PornHubUserVideosUploadIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
@@ -911,7 +881,6 @@ from .puhutv import (
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
@@ -946,7 +915,10 @@ from .raywenderlich import (
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
from .redbulltv import (
|
||||
RedBullTVIE,
|
||||
RedBullTVRrnContentIE,
|
||||
)
|
||||
from .reddit import (
|
||||
RedditIE,
|
||||
RedditRIE,
|
||||
@@ -985,7 +957,6 @@ from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
|
||||
from .rtvnh import RTVNHIE
|
||||
from .rtvs import RTVSIE
|
||||
from .rudo import RudoIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -1009,10 +980,13 @@ from .sbs import SBSIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .scrippsnetworks import ScrippsNetworksWatchIE
|
||||
from .scte import (
|
||||
SCTEIE,
|
||||
SCTECourseIE,
|
||||
)
|
||||
from .seeker import SeekerIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .servus import ServusIE
|
||||
from .sevenplus import SevenPlusIE
|
||||
from .sexu import SexuIE
|
||||
@@ -1053,6 +1027,7 @@ from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sonyliv import SonyLIVIE
|
||||
from .soundcloud import (
|
||||
SoundcloudEmbedIE,
|
||||
SoundcloudIE,
|
||||
SoundcloudSetIE,
|
||||
SoundcloudUserIE,
|
||||
@@ -1145,12 +1120,14 @@ from .telegraaf import TelegraafIE
|
||||
from .telemb import TeleMBIE
|
||||
from .telequebec import (
|
||||
TeleQuebecIE,
|
||||
TeleQuebecSquatIE,
|
||||
TeleQuebecEmissionIE,
|
||||
TeleQuebecLiveIE,
|
||||
)
|
||||
from .teletask import TeleTaskIE
|
||||
from .telewebion import TelewebionIE
|
||||
from .tennistv import TennisTVIE
|
||||
from .tenplay import TenPlayIE
|
||||
from .testurl import TestURLIE
|
||||
from .tf1 import TF1IE
|
||||
from .tfo import TFOIE
|
||||
@@ -1203,11 +1180,11 @@ from .tunein import (
|
||||
)
|
||||
from .tunepk import TunePkIE
|
||||
from .turbo import TurboIE
|
||||
from .tutv import TutvIE
|
||||
from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv2dk import TV2DKIE
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
@@ -1273,6 +1250,10 @@ from .udn import UDNEmbedIE
|
||||
from .ufctv import UFCTVIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .digiteka import DigitekaIE
|
||||
from .dlive import (
|
||||
DLiveVODIE,
|
||||
DLiveStreamIE,
|
||||
)
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
@@ -1294,7 +1275,6 @@ from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import (
|
||||
VevoIE,
|
||||
@@ -1336,7 +1316,6 @@ from .viewlift import (
|
||||
ViewLiftIE,
|
||||
ViewLiftEmbedIE,
|
||||
)
|
||||
from .viewster import ViewsterIE
|
||||
from .viidea import ViideaIE
|
||||
from .vimeo import (
|
||||
VimeoIE,
|
||||
@@ -1425,7 +1404,6 @@ from .weibo import (
|
||||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wimp import WimpIE
|
||||
from .wistia import WistiaIE
|
||||
from .worldstarhiphop import WorldStarHipHopIE
|
||||
from .wsj import (
|
||||
@@ -1439,6 +1417,7 @@ from .xfileshare import XFileShareIE
|
||||
from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
XHamsterUserIE,
|
||||
)
|
||||
from .xiami import (
|
||||
XiamiSongIE,
|
||||
@@ -1462,6 +1441,7 @@ from .yahoo import (
|
||||
YahooSearchIE,
|
||||
YahooGyaOPlayerIE,
|
||||
YahooGyaOIE,
|
||||
YahooJapanNewsIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
from .yandexmusic import (
|
||||
|
@@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor):
|
||||
if not video_data:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
subtitles = {}
|
||||
formats = []
|
||||
for f in video_data:
|
||||
format_id = f['stream_type']
|
||||
@@ -402,9 +403,17 @@ class FacebookIE(InfoExtractor):
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||
subtitles_src = f[0].get('subtitles_src')
|
||||
if subtitles_src:
|
||||
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
||||
if not formats:
|
||||
raise ExtractorError('Cannot find video formats')
|
||||
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
@@ -428,7 +437,7 @@ class FacebookIE(InfoExtractor):
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
@@ -442,6 +451,7 @@ class FacebookIE(InfoExtractor):
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@@ -9,7 +9,7 @@ from ..utils import int_or_none
|
||||
|
||||
class FiveTVIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
http://
|
||||
https?://
|
||||
(?:www\.)?5-tv\.ru/
|
||||
(?:
|
||||
(?:[^/]+/)+(?P<id>\d+)|
|
||||
@@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor):
|
||||
'duration': 180,
|
||||
},
|
||||
}, {
|
||||
# redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
|
||||
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
|
||||
'info_dict': {
|
||||
'id': 'glavnoe',
|
||||
@@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor):
|
||||
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'redirect to «Известия. Главное» project page',
|
||||
}, {
|
||||
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
|
||||
'only_matching': True,
|
||||
@@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
[r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
|
||||
[r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
|
||||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||
webpage, 'video url')
|
||||
|
||||
|
@@ -1,115 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FlipagramIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://flipagram.com/f/nyvTSJMKId',
|
||||
'md5': '888dcf08b7ea671381f00fab74692755',
|
||||
'info_dict': {
|
||||
'id': 'nyvTSJMKId',
|
||||
'ext': 'mp4',
|
||||
'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
|
||||
'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
|
||||
'duration': 35.571,
|
||||
'timestamp': 1461244995,
|
||||
'upload_date': '20160421',
|
||||
'uploader': 'kitty juria',
|
||||
'uploader_id': 'sjuria101',
|
||||
'creator': 'kitty juria',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'comments': list,
|
||||
'formats': 'mincount:2',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
|
||||
video_id)
|
||||
|
||||
flipagram = video_data['flipagram']
|
||||
video = flipagram['video']
|
||||
|
||||
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
title = json_ld.get('title') or flipagram['captionText']
|
||||
description = json_ld.get('description') or flipagram.get('captionText')
|
||||
|
||||
formats = [{
|
||||
'url': video['url'],
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': int_or_none(video_data.get('size')),
|
||||
}]
|
||||
|
||||
preview_url = try_get(
|
||||
flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
|
||||
if preview_url:
|
||||
formats.append({
|
||||
'url': preview_url,
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
counts = flipagram.get('counts', {})
|
||||
user = flipagram.get('user', {})
|
||||
video_data = flipagram.get('video', {})
|
||||
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(cover['url']),
|
||||
'width': int_or_none(cover.get('width')),
|
||||
'height': int_or_none(cover.get('height')),
|
||||
'filesize': int_or_none(cover.get('size')),
|
||||
} for cover in flipagram.get('covers', []) if cover.get('url')]
|
||||
|
||||
# Note that this only retrieves comments that are initially loaded.
|
||||
# For videos with large amounts of comments, most won't be retrieved.
|
||||
comments = []
|
||||
for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
|
||||
text = comment.get('comment')
|
||||
if not text or not isinstance(text, list):
|
||||
continue
|
||||
comments.append({
|
||||
'author': comment.get('user', {}).get('name'),
|
||||
'author_id': comment.get('user', {}).get('username'),
|
||||
'id': comment.get('id'),
|
||||
'text': text[0],
|
||||
'timestamp': unified_timestamp(comment.get('created')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': float_or_none(flipagram.get('duration'), 1000),
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
|
||||
'uploader': user.get('name'),
|
||||
'uploader_id': user.get('username'),
|
||||
'creator': user.get('name'),
|
||||
'view_count': int_or_none(counts.get('plays')),
|
||||
'like_count': int_or_none(counts.get('likes')),
|
||||
'repost_count': int_or_none(counts.get('reflips')),
|
||||
'comment_count': int_or_none(counts.get('comments')),
|
||||
'comments': comments,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,13 +1,23 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .anvato import AnvatoIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class FOX9IE(AnvatoIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.fox9.com/news/215123287-story',
|
||||
class FOX9IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
|
||||
'Anvato', video_id)
|
||||
|
||||
|
||||
class FOX9NewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
|
||||
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
|
||||
'info_dict': {
|
||||
'id': '314473',
|
||||
@@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE):
|
||||
'categories': ['News', 'Sports'],
|
||||
'tags': ['news', 'video'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.fox9.com/news/investigators/214070684-story',
|
||||
'only_matching': True,
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._parse_json(
|
||||
self._search_regex(
|
||||
r"this\.videosJson\s*=\s*'(\[.+?\])';",
|
||||
webpage, 'anvato playlist'),
|
||||
video_id)[0]['video']
|
||||
|
||||
return self._get_anvato_videos(
|
||||
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
|
||||
video_id)
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
anvato_id = self._search_regex(
|
||||
r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
|
||||
return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
|
||||
|
@@ -1,89 +1,21 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||
}
|
||||
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||
|
||||
@staticmethod
|
||||
def _make_headers(referer):
|
||||
headers = FunkBaseIE._HEADERS.copy()
|
||||
headers['Referer'] = referer
|
||||
return headers
|
||||
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:%s' % video['sourceId'],
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': video['sourceId'],
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'season_number': int_or_none(video.get('seasonNr')),
|
||||
'episode_number': int_or_none(video.get('episodeNr')),
|
||||
}
|
||||
|
||||
|
||||
class FunkMixIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
|
||||
'md5': '8edf617c2f2b7c9847dfda313f199009',
|
||||
'info_dict': {
|
||||
'id': '123748',
|
||||
'ext': 'mp4',
|
||||
'title': '"Die realste Kifferdoku aller Zeiten"',
|
||||
'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
|
||||
'timestamp': 1490274721,
|
||||
'upload_date': '20170323',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
mix_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers=self._make_headers(url), query={
|
||||
'size': 100,
|
||||
})['_embedded']['curatedListList']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas
|
||||
if try_get(
|
||||
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||
compat_str) == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
|
||||
class FunkChannelIE(FunkBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
@@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE):
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
|
||||
}, {
|
||||
# only available via byIdList API
|
||||
'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
|
||||
'info_dict': {
|
||||
'id': '205067',
|
||||
'ext': 'mp4',
|
||||
'title': 'Martin Sonneborn erklärt die EU',
|
||||
'description': 'md5:050f74626e4ed87edf4626d2024210c0',
|
||||
'timestamp': 1494424042,
|
||||
'upload_date': '20170510',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
headers = self._make_headers(url)
|
||||
|
||||
video = None
|
||||
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
for page_num in itertools.count():
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
|
||||
headers=headers, query={
|
||||
'filterFsk': 'false',
|
||||
'sort': 'creationDate,desc',
|
||||
'size': 100,
|
||||
'page': page_num,
|
||||
}, fatal=False)
|
||||
if not by_channel_alias:
|
||||
break
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if not video_list:
|
||||
break
|
||||
try:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
break
|
||||
except StopIteration:
|
||||
pass
|
||||
if not try_get(
|
||||
by_channel_alias, lambda x: x['_links']['next']):
|
||||
break
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||
query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
|
||||
if not video:
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
video = next(r for r in results if r.get('alias') == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
display_id, nexx_id = re.match(self._VALID_URL, url).groups()
|
||||
video = self._download_json(
|
||||
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:' + nexx_id,
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': nexx_id,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'channel_id': str_or_none(video.get('channelId')),
|
||||
'display_id': display_id,
|
||||
'tags': video.get('tags'),
|
||||
'thumbnail': video.get('imageUrlLandscape'),
|
||||
}
|
||||
|
@@ -1,162 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class FunnyOrDieIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
|
||||
'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9',
|
||||
'info_dict': {
|
||||
'id': '0732f586d7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heart-Shaped Box: Literal Video Version',
|
||||
'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
|
||||
'thumbnail': r're:^http:.*\.jpg$',
|
||||
'uploader': 'DASjr',
|
||||
'timestamp': 1317904928,
|
||||
'upload_date': '20111006',
|
||||
'duration': 318.3,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/embed/e402820827',
|
||||
'info_dict': {
|
||||
'id': 'e402820827',
|
||||
'ext': 'mp4',
|
||||
'title': 'Please Use This Song (Jon Lajoie)',
|
||||
'description': 'Please use this to sell something. www.jonlajoie.com',
|
||||
'thumbnail': r're:^http:.*\.jpg$',
|
||||
'timestamp': 1398988800,
|
||||
'upload_date': '20140502',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
|
||||
if not links:
|
||||
raise ExtractorError('No media links available for %s' % video_id)
|
||||
|
||||
links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1',
|
||||
webpage, 'm3u8 url', group='url')
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
source_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
||||
if source_formats:
|
||||
self._sort_formats(source_formats)
|
||||
|
||||
for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
|
||||
for path, ext in links:
|
||||
ff = f.copy()
|
||||
if ff:
|
||||
if ext != 'mp4':
|
||||
ff = dict(
|
||||
[(k, v) for k, v in ff.items()
|
||||
if k in ('height', 'width', 'format_id')])
|
||||
ff.update({
|
||||
'format_id': ff['format_id'].replace('hls', ext),
|
||||
'ext': ext,
|
||||
'protocol': 'http',
|
||||
})
|
||||
else:
|
||||
ff.update({
|
||||
'format_id': '%s-%d' % (ext, bitrate),
|
||||
'vbr': bitrate,
|
||||
})
|
||||
ff['url'] = self._proto_relative_url(
|
||||
'%s%d.%s' % (path, bitrate, ext))
|
||||
formats.append(ff)
|
||||
self._check_formats(formats, video_id)
|
||||
|
||||
formats.extend(m3u8_formats)
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
subtitles = {}
|
||||
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
|
||||
subtitles[src_lang] = [{
|
||||
'ext': src.split('/')[-1],
|
||||
'url': 'http://www.funnyordie.com%s' % src,
|
||||
}]
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp', default=None))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
|
||||
webpage, 'uploader', default=None)
|
||||
|
||||
title, description, thumbnail, duration = [None] * 4
|
||||
|
||||
medium = self._parse_json(
|
||||
self._search_regex(
|
||||
r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
if medium:
|
||||
title = medium.get('title')
|
||||
duration = float_or_none(medium.get('duration'))
|
||||
if not timestamp:
|
||||
timestamp = unified_timestamp(medium.get('publishDate'))
|
||||
|
||||
post = self._parse_json(
|
||||
self._search_regex(
|
||||
r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
|
||||
default='{}'),
|
||||
video_id, fatal=False)
|
||||
if post:
|
||||
if not title:
|
||||
title = post.get('name')
|
||||
description = post.get('description')
|
||||
thumbnail = post.get('picture')
|
||||
|
||||
if not title:
|
||||
title = self._og_search_title(webpage)
|
||||
if not description:
|
||||
description = self._og_search_description(webpage)
|
||||
if not duration:
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
('video:duration', 'duration'), webpage, 'duration', default=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@@ -1,35 +1,84 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .ooyala import OoyalaIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class FusionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
|
||||
'info_dict': {
|
||||
'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
|
||||
'id': '3145868',
|
||||
'ext': 'mp4',
|
||||
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
|
||||
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
|
||||
'duration': 140.0,
|
||||
'timestamp': 1442589635,
|
||||
'uploader': 'UNIVISON',
|
||||
'upload_date': '20150918',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['Ooyala'],
|
||||
'add_ie': ['Anvato'],
|
||||
}, {
|
||||
'url': 'http://fusion.tv/video/201781',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
|
||||
|
||||
ooyala_code = self._search_regex(
|
||||
r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
|
||||
webpage, 'ooyala code', group='code')
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'description': video.get('excerpt'),
|
||||
'timestamp': parse_iso8601(video.get('published')),
|
||||
'series': video.get('show'),
|
||||
}
|
||||
|
||||
return OoyalaIE._build_url_result(ooyala_code)
|
||||
formats = []
|
||||
src = video.get('src') or {}
|
||||
for f_id, f in src.items():
|
||||
for q_id, q in f.items():
|
||||
q_url = q.get('url')
|
||||
if not q_url:
|
||||
continue
|
||||
ext = determine_ext(q_url, mimetype2ext(q.get('type')))
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
|
||||
elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': '-'.join([f_id, q_id]),
|
||||
'url': q_url,
|
||||
'width': int_or_none(q.get('width')),
|
||||
'height': int_or_none(q.get('height')),
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
})
|
||||
if formats:
|
||||
self._sort_formats(formats)
|
||||
info['formats'] = formats
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': 'anvato:uni:' + video['video_ids']['anvato'],
|
||||
'ie_key': 'Anvato',
|
||||
})
|
||||
|
||||
return info
|
||||
|
@@ -1,12 +1,19 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
)
|
||||
|
||||
|
||||
class GameInformerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
|
||||
_TESTS = [{
|
||||
# normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
|
||||
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
|
||||
'md5': '292f26da1ab4beb4c9099f1304d2b071',
|
||||
'info_dict': {
|
||||
@@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor):
|
||||
'upload_date': '20150928',
|
||||
'uploader_id': '694940074001',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# Brightcove id inside unique element with field--name-field-brightcove-video-id class
|
||||
'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
|
||||
'info_dict': {
|
||||
'id': '6057111913001',
|
||||
'ext': 'mp4',
|
||||
'title': 'New Gameplay Today – Streets Of Rogue',
|
||||
'timestamp': 1562699001,
|
||||
'upload_date': '20190709',
|
||||
'uploader_id': '694940074001',
|
||||
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, headers=self.geo_verification_headers())
|
||||
brightcove_id = self._search_regex(
|
||||
[r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
|
||||
webpage, 'brightcove id')
|
||||
return self.url_result(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
|
||||
brightcove_id)
|
||||
brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
|
||||
brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
|
||||
return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)
|
||||
|
@@ -1,134 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_with_ns,
|
||||
parse_iso8601,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
NAMESPACE_MAP = {
|
||||
'media': 'http://search.yahoo.com/mrss/',
|
||||
}
|
||||
|
||||
# URL prefix to download the mp4 files directly instead of streaming via rtmp
|
||||
# Credits go to XBox-Maniac
|
||||
# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
|
||||
RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
|
||||
|
||||
|
||||
class GameOneIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.gameone.de/tv/288',
|
||||
'md5': '136656b7fb4c9cb4a8e2d500651c499b',
|
||||
'info_dict': {
|
||||
'id': '288',
|
||||
'ext': 'mp4',
|
||||
'title': 'Game One - Folge 288',
|
||||
'duration': 1238,
|
||||
'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
|
||||
'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
|
||||
'age_limit': 16,
|
||||
'upload_date': '20140513',
|
||||
'timestamp': 1399980122,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://gameone.de/tv/220',
|
||||
'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
|
||||
'info_dict': {
|
||||
'id': '220',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20120918',
|
||||
'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
|
||||
'timestamp': 1347971451,
|
||||
'title': 'Game One - Folge 220',
|
||||
'duration': 896.62,
|
||||
'age_limit': 16,
|
||||
}
|
||||
}
|
||||
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
og_video = self._og_search_video_url(webpage, secure=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
age_limit = int(
|
||||
self._search_regex(
|
||||
r'age=(\d+)',
|
||||
self._html_search_meta(
|
||||
'age-de-meta-label',
|
||||
webpage),
|
||||
'age_limit',
|
||||
'0'))
|
||||
mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
|
||||
|
||||
mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
|
||||
title = mrss.find('.//item/title').text
|
||||
thumbnail = mrss.find('.//item/image').get('url')
|
||||
timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
|
||||
content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
|
||||
content_url = content.get('url')
|
||||
|
||||
content = self._download_xml(
|
||||
content_url,
|
||||
video_id,
|
||||
'Downloading media:content')
|
||||
rendition_items = content.findall('.//rendition')
|
||||
duration = float_or_none(rendition_items[0].get('duration'))
|
||||
formats = [
|
||||
{
|
||||
'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
|
||||
'width': int_or_none(r.get('width')),
|
||||
'height': int_or_none(r.get('height')),
|
||||
'tbr': int_or_none(r.get('bitrate')),
|
||||
}
|
||||
for r in rendition_items
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'description': description,
|
||||
'age_limit': age_limit,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
|
||||
|
||||
class GameOnePlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
|
||||
IE_NAME = 'gameone:playlist'
|
||||
_TEST = {
|
||||
'url': 'http://www.gameone.de/tv',
|
||||
'info_dict': {
|
||||
'title': 'GameOne',
|
||||
},
|
||||
'playlist_mincount': 294,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
|
||||
max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
|
||||
entries = [
|
||||
self.url_result('http://www.gameone.de/tv/%d' %
|
||||
video_id, 'GameOne')
|
||||
for video_id in range(max_id, 0, -1)]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'title': 'GameOne',
|
||||
'entries': entries,
|
||||
}
|
@@ -77,11 +77,10 @@ from .instagram import InstagramIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .theplatform import ThePlatformIE
|
||||
from .vessel import VesselIE
|
||||
from .kaltura import KalturaIE
|
||||
from .eagleplatform import EaglePlatformIE
|
||||
from .facebook import FacebookIE
|
||||
from .soundcloud import SoundcloudIE
|
||||
from .soundcloud import SoundcloudEmbedIE
|
||||
from .tunein import TuneInBaseIE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .dbtv import DBTVIE
|
||||
@@ -119,6 +118,7 @@ from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
from .odnoklassniki import OdnoklassnikiIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -2075,6 +2075,22 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Squarespace video embed, 2019-08-28
|
||||
'url': 'http://ootboxford.com',
|
||||
'info_dict': {
|
||||
'id': 'Tc7b_JGdZfw',
|
||||
'title': 'Out of the Blue, at Childish Things 10',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
|
||||
'uploader_id': 'helendouglashouse',
|
||||
'uploader': 'Helen & Douglas House',
|
||||
'upload_date': '20140328',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
@@ -2104,6 +2120,23 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
},
|
||||
{
|
||||
# DailyMotion embed with DM.player
|
||||
'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
|
||||
'info_dict': {
|
||||
'id': 'k6aKkGHd9FJs4mtJN39',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
|
||||
'description': 'This video is private.',
|
||||
'uploader_id': 'x1jf30l',
|
||||
'uploader': 'beIN SPORTS USA',
|
||||
'upload_date': '20190528',
|
||||
'timestamp': 1559062971,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@@ -2209,7 +2242,7 @@ class GenericIE(InfoExtractor):
|
||||
default_search = 'fixup_error'
|
||||
|
||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||
if '/' in url:
|
||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
||||
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||
return self.url_result('http://' + url)
|
||||
elif default_search != 'fixup_error':
|
||||
@@ -2378,6 +2411,12 @@ class GenericIE(InfoExtractor):
|
||||
# Unescaping the whole page allows to handle those cases in a generic way
|
||||
webpage = compat_urllib_parse_unquote(webpage)
|
||||
|
||||
# Unescape squarespace embeds to be detected by generic extractor,
|
||||
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||
webpage = re.sub(
|
||||
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
|
||||
lambda x: unescapeHTML(x.group(0)), webpage)
|
||||
|
||||
# it's tempting to parse this further, but you would
|
||||
# have to take into account all the variations like
|
||||
# Video Title - Site Name
|
||||
@@ -2452,11 +2491,6 @@ class GenericIE(InfoExtractor):
|
||||
if tp_urls:
|
||||
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
|
||||
|
||||
# Look for Vessel embeds
|
||||
vessel_urls = VesselIE._extract_urls(webpage)
|
||||
if vessel_urls:
|
||||
return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
@@ -2583,19 +2617,6 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group(1), 'Mpora')
|
||||
|
||||
# Look for embedded NovaMov-based player
|
||||
mobj = re.search(
|
||||
r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
|
||||
(?P<url>http://(?:(?:embed|www)\.)?
|
||||
(?:novamov\.com|
|
||||
nowvideo\.(?:ch|sx|eu|at|ag|co)|
|
||||
videoweed\.(?:es|com)|
|
||||
movshare\.(?:net|sx|ag)|
|
||||
divxstage\.(?:eu|net|ch|co|at|ag))
|
||||
/embed\.php.+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for embedded Facebook player
|
||||
facebook_urls = FacebookIE._extract_urls(webpage)
|
||||
if facebook_urls:
|
||||
@@ -2607,9 +2628,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(mobj.group('url'), 'VK')
|
||||
|
||||
# Look for embedded Odnoklassniki player
|
||||
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Odnoklassniki')
|
||||
odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
|
||||
if odnoklassniki_url:
|
||||
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
|
||||
|
||||
# Look for embedded ivi player
|
||||
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
|
||||
@@ -2728,9 +2749,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(myvi_url)
|
||||
|
||||
# Look for embedded soundcloud player
|
||||
soundcloud_urls = SoundcloudIE._extract_urls(webpage)
|
||||
soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
|
||||
if soundcloud_urls:
|
||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
|
||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
|
||||
|
||||
# Look for tunein player
|
||||
tunein_urls = TuneInBaseIE._extract_urls(webpage)
|
||||
@@ -2942,10 +2963,14 @@ class GenericIE(InfoExtractor):
|
||||
|
||||
# Look for Mangomolo embeds
|
||||
mobj = re.search(
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
|
||||
r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
|
||||
(?:
|
||||
admin\.mangomolo\.com/analytics/index\.php/customers/embed|
|
||||
player\.mangomolo\.com/v1
|
||||
)/
|
||||
(?:
|
||||
video\?.*?\bid=(?P<video_id>\d+)|
|
||||
index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
(?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
|
||||
).+?)\1''', webpage)
|
||||
if mobj is not None:
|
||||
info = {
|
||||
|
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^-/?#]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
@@ -44,12 +44,21 @@ class GfycatIE(InfoExtractor):
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -96,21 +96,31 @@ class GloboIE(InfoExtractor):
|
||||
video = self._download_json(
|
||||
'http://api.globovideos.com/videos/%s/playlist' % video_id,
|
||||
video_id)['videos'][0]
|
||||
if video.get('encrypted') is True:
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for resource in video['resources']:
|
||||
resource_id = resource.get('_id')
|
||||
resource_url = resource.get('url')
|
||||
if not resource_id or not resource_url:
|
||||
resource_type = resource.get('type')
|
||||
if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
|
||||
continue
|
||||
|
||||
if resource_type == 'subtitle':
|
||||
subtitles.setdefault(resource.get('language') or 'por', []).append({
|
||||
'url': resource_url,
|
||||
})
|
||||
continue
|
||||
|
||||
security = self._download_json(
|
||||
'http://security.video.globo.com/videos/%s/hash' % video_id,
|
||||
video_id, 'Downloading security hash for %s' % resource_id, query={
|
||||
'player': 'flash',
|
||||
'version': '17.0.0.132',
|
||||
'player': 'desktop',
|
||||
'version': '5.19.1',
|
||||
'resource_id': resource_id,
|
||||
})
|
||||
|
||||
@@ -123,18 +133,23 @@ class GloboIE(InfoExtractor):
|
||||
continue
|
||||
|
||||
hash_code = security_hash[:2]
|
||||
received_time = security_hash[2:12]
|
||||
received_random = security_hash[12:22]
|
||||
received_md5 = security_hash[22:]
|
||||
|
||||
sign_time = compat_str(int(received_time) + 86400)
|
||||
padding = '%010d' % random.randint(1, 10000000000)
|
||||
if hash_code in ('04', '14'):
|
||||
received_time = security_hash[3:13]
|
||||
received_md5 = security_hash[24:]
|
||||
hash_prefix = security_hash[:23]
|
||||
elif hash_code in ('02', '12', '03', '13'):
|
||||
received_time = security_hash[2:12]
|
||||
received_md5 = security_hash[22:]
|
||||
padding += '1'
|
||||
hash_prefix = '05' + security_hash[:22]
|
||||
|
||||
md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
|
||||
padded_sign_time = compat_str(int(received_time) + 86400) + padding
|
||||
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
|
||||
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
|
||||
signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
|
||||
signed_hash = hash_prefix + padded_sign_time + signed_md5
|
||||
signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
|
||||
|
||||
signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
|
||||
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
|
||||
@@ -164,7 +179,8 @@ class GloboIE(InfoExtractor):
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
@@ -34,10 +34,23 @@ class GoIE(AdobePassIE):
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'resource_id': 'DisneyXD',
|
||||
},
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'resource_id': 'Disney',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:(?P<sub_domain>%s)\.)?go|
|
||||
(?P<sub_domain_2>abc|freeform|disneynow)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
|
||||
)
|
||||
''' % '|'.join(list(_SITE_INFO.keys()))
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
@@ -50,6 +63,7 @@ class GoIE(AdobePassIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This content is no longer available.',
|
||||
}, {
|
||||
'url': 'http://watchdisneyxd.go.com/doraemon',
|
||||
'info_dict': {
|
||||
@@ -57,6 +71,34 @@ class GoIE(AdobePassIE):
|
||||
'id': 'SH55574025',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
|
||||
'info_dict': {
|
||||
'id': 'VDKA3609139',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Guilty Blood',
|
||||
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
|
||||
'info_dict': {
|
||||
'id': 'VDKA13435179',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Bet',
|
||||
'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
@@ -71,6 +113,9 @@ class GoIE(AdobePassIE):
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@@ -80,16 +125,21 @@ class GoIE(AdobePassIE):
|
||||
display_id)['video']
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
||||
default=None)
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||
), webpage, 'video id', default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
|
@@ -1,149 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class Go90IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
|
||||
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
|
||||
'info_dict': {
|
||||
'id': '84BUqjLpf9D',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
|
||||
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
||||
'timestamp': 1491868800,
|
||||
'upload_date': '20170411',
|
||||
'age_limit': 14,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.go90.com/embed/261MflWkD3N',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update({
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
})
|
||||
video_data = self._download_json(
|
||||
'https://www.go90.com/api/view/items/' + video_id, video_id,
|
||||
headers=headers, data=b'{"client":"web","device_type":"pc"}')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
|
||||
if 'region unavailable' in message:
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(message, expected=True)
|
||||
raise
|
||||
|
||||
if video_data.get('requires_drm'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
||||
episode_number = int_or_none(video_data.get('episode_number'))
|
||||
series = None
|
||||
season = None
|
||||
season_id = None
|
||||
season_number = None
|
||||
for metadata in video_data.get('__children', {}).get('Item', {}).values():
|
||||
if metadata.get('type') == 'show':
|
||||
series = metadata.get('title')
|
||||
elif metadata.get('type') == 'season':
|
||||
season = metadata.get('title')
|
||||
season_id = metadata.get('id')
|
||||
season_number = int_or_none(metadata.get('season_number'))
|
||||
|
||||
title = episode = video_data.get('title') or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for asset in video_data.get('assets'):
|
||||
if asset.get('id') == main_video_asset:
|
||||
for source in asset.get('sources', []):
|
||||
source_location = source.get('location')
|
||||
if not source_location:
|
||||
continue
|
||||
source_type = source.get('type')
|
||||
if source_type == 'hls':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
source_location, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
for f in m3u8_formats:
|
||||
mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
|
||||
if mobj:
|
||||
height, tbr = mobj.groups()
|
||||
height = int_or_none(height)
|
||||
f.update({
|
||||
'height': f.get('height') or height,
|
||||
'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
'tbr': f.get('tbr') or int_or_none(tbr),
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
elif source_type == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
source_location, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source.get('name'),
|
||||
'url': source_location,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
})
|
||||
|
||||
for caption in asset.get('caption_metadata', []):
|
||||
caption_url = caption.get('source_url')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': determine_ext(caption_url, 'vtt'),
|
||||
})
|
||||
elif asset.get('type') == 'image':
|
||||
asset_location = asset.get('location')
|
||||
if not asset_location:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': asset_location,
|
||||
'width': int_or_none(asset.get('width')),
|
||||
'height': int_or_none(asset.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': video_data.get('short_description'),
|
||||
'like_count': int_or_none(video_data.get('like_count')),
|
||||
'timestamp': parse_iso8601(video_data.get('released_at')),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season': season,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||
}
|
@@ -1,33 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HarkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
|
||||
_TEST = {
|
||||
'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
||||
'md5': '6783a58491b47b92c7c1af5a77d4cbee',
|
||||
'info_dict': {
|
||||
'id': 'mmbzyhkgny',
|
||||
'ext': 'mp3',
|
||||
'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
|
||||
'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
|
||||
'duration': 11,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'http://www.hark.com/clips/%s.json' % video_id, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': data['url'],
|
||||
'title': data['name'],
|
||||
'description': data.get('description'),
|
||||
'thumbnail': data.get('image_original'),
|
||||
'duration': data.get('duration'),
|
||||
}
|
@@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
def _make_kaltura_result(kaltura_url):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(kaltura_url, {'source_url': url}),
|
||||
@@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
|
||||
'description': description,
|
||||
}
|
||||
|
||||
kaltura_url = KalturaIE._extract_url(webpage)
|
||||
if kaltura_url:
|
||||
return _make_kaltura_result(kaltura_url)
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
|
||||
default=None, group='id')
|
||||
if kaltura_id:
|
||||
return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
|
||||
|
||||
yt_urls = YoutubeIE._extract_urls(webpage)
|
||||
if yt_urls:
|
||||
return self.playlist_from_matches(
|
||||
|
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
|
||||
@@ -126,6 +127,8 @@ class HotStarIE(HotStarBaseIE):
|
||||
format_url = url_or_none(playback_set.get('playbackUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_url = re.sub(
|
||||
r'(?<=//staragvod)(\d)', r'web\1', format_url)
|
||||
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
||||
if tags and 'encryption:plain' not in tags:
|
||||
continue
|
||||
@@ -133,7 +136,8 @@ class HotStarIE(HotStarBaseIE):
|
||||
try:
|
||||
if 'package:hls' in tags or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
format_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls'))
|
||||
elif 'package:dash' in tags or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash'))
|
||||
|
@@ -1,85 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
get_element_by_id,
|
||||
remove_end,
|
||||
)
|
||||
|
||||
|
||||
class IconosquareIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://statigr.am/p/522207370455279102_24101272',
|
||||
'md5': '6eb93b882a3ded7c378ee1d6884b1814',
|
||||
'info_dict': {
|
||||
'id': '522207370455279102_24101272',
|
||||
'ext': 'mp4',
|
||||
'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
|
||||
'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
|
||||
'timestamp': 1376471991,
|
||||
'upload_date': '20130814',
|
||||
'uploader': 'aguynamedpatrick',
|
||||
'uploader_id': '24101272',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
media = self._parse_json(
|
||||
get_element_by_id('mediaJson', webpage),
|
||||
video_id)
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height'))
|
||||
} for format_id, f in media['videos'].items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
|
||||
|
||||
timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
|
||||
description = media.get('caption', {}).get('text')
|
||||
|
||||
uploader = media.get('user', {}).get('username')
|
||||
uploader_id = media.get('user', {}).get('id')
|
||||
|
||||
comment_count = int_or_none(media.get('comments', {}).get('count'))
|
||||
like_count = int_or_none(media.get('likes', {}).get('count'))
|
||||
|
||||
thumbnails = [{
|
||||
'url': t['url'],
|
||||
'id': thumbnail_id,
|
||||
'width': int_or_none(t.get('width')),
|
||||
'height': int_or_none(t.get('height'))
|
||||
} for thumbnail_id, t in media.get('images', {}).items()]
|
||||
|
||||
comments = [{
|
||||
'id': comment.get('id'),
|
||||
'text': comment['text'],
|
||||
'timestamp': int_or_none(comment.get('created_time')),
|
||||
'author': comment.get('from', {}).get('full_name'),
|
||||
'author_id': comment.get('from', {}).get('username'),
|
||||
} for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'comment_count': comment_count,
|
||||
'like_count': like_count,
|
||||
'formats': formats,
|
||||
'comments': comments,
|
||||
}
|
@@ -22,7 +22,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
@@ -1,15 +1,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class InternetVideoArchiveIE(InfoExtractor):
|
||||
@@ -20,7 +18,7 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '194487',
|
||||
'ext': 'mp4',
|
||||
'title': 'KICK-ASS 2',
|
||||
'title': 'Kick-Ass 2',
|
||||
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
|
||||
},
|
||||
'params': {
|
||||
@@ -33,68 +31,34 @@ class InternetVideoArchiveIE(InfoExtractor):
|
||||
def _build_json_url(query):
|
||||
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
|
||||
|
||||
@staticmethod
|
||||
def _build_xml_url(query):
|
||||
return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_urlparse.urlparse(url).query
|
||||
query_dic = compat_parse_qs(query)
|
||||
video_id = query_dic['publishedid'][0]
|
||||
|
||||
if '/player/' in url:
|
||||
configuration = self._download_json(url, video_id)
|
||||
|
||||
# There are multiple videos in the playlist whlie only the first one
|
||||
# matches the video played in browsers
|
||||
video_info = configuration['playlist'][0]
|
||||
title = video_info['title']
|
||||
|
||||
formats = []
|
||||
for source in video_info['sources']:
|
||||
file_url = source['file']
|
||||
if determine_ext(file_url) == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
if m3u8_formats:
|
||||
formats.extend(m3u8_formats)
|
||||
file_url = m3u8_formats[0]['url']
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
file_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
file_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
a_format = {
|
||||
'url': file_url,
|
||||
}
|
||||
|
||||
if source.get('label') and source['label'][-4:] == ' kbs':
|
||||
tbr = int_or_none(source['label'][:-4])
|
||||
a_format.update({
|
||||
'tbr': tbr,
|
||||
'format_id': 'http-%d' % tbr,
|
||||
})
|
||||
formats.append(a_format)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = video_info.get('description')
|
||||
thumbnail = video_info.get('image')
|
||||
else:
|
||||
configuration = self._download_xml(url, video_id)
|
||||
formats = [{
|
||||
'url': xpath_text(configuration, './file', 'file URL', fatal=True),
|
||||
}]
|
||||
thumbnail = xpath_text(configuration, './image', 'thumbnail')
|
||||
title = 'InternetVideoArchive video %s' % video_id
|
||||
description = None
|
||||
query = compat_parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = query['publishedid'][0]
|
||||
data = self._download_json(
|
||||
'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
|
||||
video_id, data=json.dumps({
|
||||
'customerid': query['customerid'][0],
|
||||
'publishedid': video_id,
|
||||
}).encode())
|
||||
title = data['Title']
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['VideoUrl'], video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
file_url = formats[0]['url']
|
||||
if '.ism/' in file_url:
|
||||
replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
'thumbnail': data.get('PosterUrl'),
|
||||
'description': data.get('Description'),
|
||||
}
|
||||
|
@@ -1,38 +1,26 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
import random
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import compat_str
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_duration
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class JamendoBaseIE(InfoExtractor):
|
||||
def _extract_meta(self, webpage, fatal=True):
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
if title:
|
||||
title = self._search_regex(
|
||||
r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
|
||||
if not title:
|
||||
title = self._html_search_meta(
|
||||
'name', webpage, 'title', fatal=fatal)
|
||||
mobj = re.search(r'(.+) - (.+)', title or '')
|
||||
artist, second = mobj.groups() if mobj else [None] * 2
|
||||
return title, artist, second
|
||||
|
||||
|
||||
class JamendoIE(JamendoBaseIE):
|
||||
class JamendoIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
licensing\.jamendo\.com/[^/]+|
|
||||
(?:www\.)?jamendo\.com
|
||||
)
|
||||
/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
|
||||
/track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||
@@ -45,7 +33,9 @@ class JamendoIE(JamendoBaseIE):
|
||||
'artist': 'Maya Filipič',
|
||||
'track': 'Stories from Emona I',
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg'
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1217438117,
|
||||
'upload_date': '20080730',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
@@ -53,15 +43,20 @@ class JamendoIE(JamendoBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._VALID_URL_RE.match(url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
|
||||
display_id)
|
||||
|
||||
title, artist, track = self._extract_meta(webpage)
|
||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
||||
models = self._parse_json(self._html_search_regex(
|
||||
r"data-bundled-models='([^']+)",
|
||||
webpage, 'bundled models'), track_id)
|
||||
track = models['track']['models'][0]
|
||||
title = track_name = track['name']
|
||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||
artist = get_model('artist')
|
||||
artist_name = artist.get('name')
|
||||
if artist_name:
|
||||
title = '%s - %s' % (artist_name, title)
|
||||
album = get_model('album')
|
||||
|
||||
formats = [{
|
||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||
@@ -77,31 +72,58 @@ class JamendoIE(JamendoBaseIE):
|
||||
))]
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._html_search_meta(
|
||||
'image', webpage, 'thumbnail', fatal=False)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
|
||||
webpage, 'duration', fatal=False))
|
||||
urls = []
|
||||
thumbnails = []
|
||||
for _, covers in track.get('cover', {}).items():
|
||||
for cover_id, cover_url in covers.items():
|
||||
if not cover_url or cover_url in urls:
|
||||
continue
|
||||
urls.append(cover_url)
|
||||
size = int_or_none(cover_id.lstrip('size'))
|
||||
thumbnails.append({
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
'width': size,
|
||||
'height': size,
|
||||
})
|
||||
|
||||
tags = []
|
||||
for tag in track.get('tags', []):
|
||||
tag_name = tag.get('name')
|
||||
if not tag_name:
|
||||
continue
|
||||
tags.append(tag_name)
|
||||
|
||||
stats = track.get('stats') or {}
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'display_id': display_id,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': thumbnails,
|
||||
'title': title,
|
||||
'duration': duration,
|
||||
'artist': artist,
|
||||
'track': track,
|
||||
'formats': formats
|
||||
'description': track.get('description'),
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
'artist': artist_name,
|
||||
'track': track_name,
|
||||
'album': album.get('name'),
|
||||
'formats': formats,
|
||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
||||
'timestamp': int_or_none(track.get('dateCreated')),
|
||||
'view_count': int_or_none(stats.get('listenedAll')),
|
||||
'like_count': int_or_none(stats.get('favorited')),
|
||||
'average_rating': int_or_none(stats.get('averageNote')),
|
||||
'tags': tags,
|
||||
}
|
||||
|
||||
|
||||
class JamendoAlbumIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
|
||||
class JamendoAlbumIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||
'info_dict': {
|
||||
'id': '121486',
|
||||
'title': 'Shearer - Duck On Cover'
|
||||
'title': 'Duck On Cover',
|
||||
'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'e1a2fcb42bda30dfac990212924149a8',
|
||||
@@ -111,6 +133,8 @@ class JamendoAlbumIE(JamendoBaseIE):
|
||||
'title': 'Shearer - Warmachine',
|
||||
'artist': 'Shearer',
|
||||
'track': 'Warmachine',
|
||||
'timestamp': 1368089771,
|
||||
'upload_date': '20130509',
|
||||
}
|
||||
}, {
|
||||
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
|
||||
@@ -120,6 +144,8 @@ class JamendoAlbumIE(JamendoBaseIE):
|
||||
'title': 'Shearer - Without Your Ghost',
|
||||
'artist': 'Shearer',
|
||||
'track': 'Without Your Ghost',
|
||||
'timestamp': 1368089771,
|
||||
'upload_date': '20130509',
|
||||
}
|
||||
}],
|
||||
'params': {
|
||||
@@ -127,24 +153,35 @@ class JamendoAlbumIE(JamendoBaseIE):
|
||||
}
|
||||
}
|
||||
|
||||
def _call_api(self, resource, resource_id):
|
||||
path = '/api/%ss' % resource
|
||||
rand = compat_str(random.random())
|
||||
return self._download_json(
|
||||
'https://www.jamendo.com' + path, resource_id, query={
|
||||
'id[]': resource_id,
|
||||
}, headers={
|
||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||
})[0]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._VALID_URL_RE.match(url)
|
||||
album_id = mobj.group('id')
|
||||
album_id = self._match_id(url)
|
||||
album = self._call_api('album', album_id)
|
||||
album_name = album.get('name')
|
||||
|
||||
webpage = self._download_webpage(url, mobj.group('display_id'))
|
||||
entries = []
|
||||
for track in album.get('tracks', []):
|
||||
track_id = track.get('id')
|
||||
if not track_id:
|
||||
continue
|
||||
track_id = compat_str(track_id)
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://www.jamendo.com/track/' + track_id,
|
||||
'ie_key': JamendoIE.ie_key(),
|
||||
'id': track_id,
|
||||
'album': album_name,
|
||||
})
|
||||
|
||||
title, artist, album = self._extract_meta(webpage, fatal=False)
|
||||
|
||||
entries = [{
|
||||
'_type': 'url_transparent',
|
||||
'url': compat_urlparse.urljoin(url, m.group('path')),
|
||||
'ie_key': JamendoIE.ie_key(),
|
||||
'id': self._search_regex(
|
||||
r'/track/(\d+)', m.group('path'), 'track id', default=None),
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
} for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
|
||||
webpage)]
|
||||
|
||||
return self.playlist_result(entries, album_id, title)
|
||||
return self.playlist_result(
|
||||
entries, album_id, album_name,
|
||||
clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
|
||||
|
@@ -7,7 +7,7 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class JWPlatformIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|video)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
||||
|
@@ -6,14 +6,15 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class KakaoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
|
||||
_API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
|
||||
_VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
|
||||
_API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
|
||||
@@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor):
|
||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||
'uploader_id': 2653210,
|
||||
'uploader': '쇼 음악중심',
|
||||
'uploader': '쇼! 음악중심',
|
||||
'timestamp': 1485684628,
|
||||
'upload_date': '20170129',
|
||||
}
|
||||
@@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
display_id = video_id.rstrip('@my')
|
||||
api_base = self._API_BASE_TMPL % video_id
|
||||
|
||||
player_header = {
|
||||
'Referer': update_url_query(
|
||||
@@ -55,20 +58,23 @@ class KakaoIE(InfoExtractor):
|
||||
})
|
||||
}
|
||||
|
||||
QUERY_COMMON = {
|
||||
query = {
|
||||
'player': 'monet_html5',
|
||||
'referer': url,
|
||||
'uuid': '',
|
||||
'service': 'kakao_tv',
|
||||
'section': '',
|
||||
'dteType': 'PC',
|
||||
'fields': ','.join([
|
||||
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
||||
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
|
||||
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
|
||||
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
|
||||
}
|
||||
|
||||
query = QUERY_COMMON.copy()
|
||||
query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
|
||||
impress = self._download_json(
|
||||
'%s/%s/impress' % (self._API_BASE, video_id),
|
||||
video_id, 'Downloading video info',
|
||||
api_base + 'impress', display_id, 'Downloading video info',
|
||||
query=query, headers=player_header)
|
||||
|
||||
clip_link = impress['clipLink']
|
||||
@@ -76,32 +82,22 @@ class KakaoIE(InfoExtractor):
|
||||
|
||||
title = clip.get('title') or clip_link.get('displayTitle')
|
||||
|
||||
tid = impress.get('tid', '')
|
||||
|
||||
query = QUERY_COMMON.copy()
|
||||
query.update({
|
||||
'tid': tid,
|
||||
'profile': 'HIGH',
|
||||
})
|
||||
raw = self._download_json(
|
||||
'%s/%s/raw' % (self._API_BASE, video_id),
|
||||
video_id, 'Downloading video formats info',
|
||||
query=query, headers=player_header)
|
||||
query['tid'] = impress.get('tid', '')
|
||||
|
||||
formats = []
|
||||
for fmt in raw.get('outputList', []):
|
||||
for fmt in clip.get('videoOutputList', []):
|
||||
try:
|
||||
profile_name = fmt['profile']
|
||||
if profile_name == 'AUDIO':
|
||||
continue
|
||||
query.update({
|
||||
'profile': profile_name,
|
||||
'fields': '-*,url',
|
||||
})
|
||||
fmt_url_json = self._download_json(
|
||||
'%s/%s/raw/videolocation' % (self._API_BASE, video_id),
|
||||
video_id,
|
||||
api_base + 'raw/videolocation', display_id,
|
||||
'Downloading video URL for profile %s' % profile_name,
|
||||
query={
|
||||
'service': 'kakao_tv',
|
||||
'section': '',
|
||||
'tid': tid,
|
||||
'profile': profile_name
|
||||
}, headers=player_header, fatal=False)
|
||||
query=query, headers=player_header, fatal=False)
|
||||
|
||||
if fmt_url_json is None:
|
||||
continue
|
||||
@@ -113,7 +109,8 @@ class KakaoIE(InfoExtractor):
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'format_note': fmt.get('label'),
|
||||
'filesize': int_or_none(fmt.get('filesize'))
|
||||
'filesize': int_or_none(fmt.get('filesize')),
|
||||
'tbr': int_or_none(fmt.get('kbps')),
|
||||
})
|
||||
except KeyError:
|
||||
pass
|
||||
@@ -134,9 +131,9 @@ class KakaoIE(InfoExtractor):
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': display_id,
|
||||
'title': title,
|
||||
'description': clip.get('description'),
|
||||
'description': strip_or_none(clip.get('description')),
|
||||
'uploader': clip_link.get('channel', {}).get('name'),
|
||||
'uploader_id': clip_link.get('channelId'),
|
||||
'thumbnails': thumbs,
|
||||
@@ -146,4 +143,5 @@ class KakaoIE(InfoExtractor):
|
||||
'like_count': int_or_none(clip.get('likeCount')),
|
||||
'comment_count': int_or_none(clip.get('commentCount')),
|
||||
'formats': formats,
|
||||
'tags': clip.get('tagList'),
|
||||
}
|
||||
|
@@ -103,6 +103,11 @@ class KalturaIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# unavailable source format
|
||||
'url': 'kaltura:513551:1_66x4rg7o',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -146,14 +151,15 @@ class KalturaIE(InfoExtractor):
|
||||
if mobj:
|
||||
embed_info = mobj.groupdict()
|
||||
for k, v in embed_info.items():
|
||||
embed_info[k] = v.strip()
|
||||
if v:
|
||||
embed_info[k] = v.strip()
|
||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||
escaped_pid = re.escape(embed_info['partner_id'])
|
||||
service_url = re.search(
|
||||
r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||
service_mobj = re.search(
|
||||
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||
webpage)
|
||||
if service_url:
|
||||
url = smuggle_url(url, {'service_url': service_url.group(1)})
|
||||
if service_mobj:
|
||||
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
||||
return url
|
||||
|
||||
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
|
||||
@@ -306,12 +312,17 @@ class KalturaIE(InfoExtractor):
|
||||
f['fileExt'] = 'mp4'
|
||||
video_url = sign_url(
|
||||
'%s/flavorId/%s' % (data_url, f['id']))
|
||||
format_id = '%(fileExt)s-%(bitrate)s' % f
|
||||
# Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
|
||||
if f.get('isOriginal') is True and not self._is_valid_url(
|
||||
video_url, entry_id, format_id):
|
||||
continue
|
||||
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
|
||||
# -f mp4-56)
|
||||
vcodec = 'none' if 'videoCodecId' not in f and f.get(
|
||||
'frameRate') == 0 else f.get('videoCodecId')
|
||||
formats.append({
|
||||
'format_id': '%(fileExt)s-%(bitrate)s' % f,
|
||||
'format_id': format_id,
|
||||
'ext': f.get('fileExt'),
|
||||
'tbr': int_or_none(f['bitrate']),
|
||||
'fps': int_or_none(f.get('frameRate')),
|
||||
|
@@ -1,39 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class KeekIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
|
||||
IE_NAME = 'keek'
|
||||
_TEST = {
|
||||
'url': 'https://www.keek.com/keek/NODfbab',
|
||||
'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
|
||||
'info_dict': {
|
||||
'id': 'NODfbab',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
|
||||
'uploader': 'ytdl',
|
||||
'uploader_id': 'eGT5bab',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._og_search_video_url(webpage),
|
||||
'ext': 'mp4',
|
||||
'title': self._og_search_description(webpage).strip(),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': self._search_regex(
|
||||
r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
|
||||
'uploader', fatal=False, group='uploader'),
|
||||
'uploader_id': self._search_regex(
|
||||
r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
|
||||
'uploader id', fatal=False, group='uploader_id'),
|
||||
}
|
@@ -20,7 +20,7 @@ class LA7IE(InfoExtractor):
|
||||
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
|
||||
'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
|
||||
'info_dict': {
|
||||
'id': 'inccool8-02-10-2015-163722',
|
||||
'id': '0_42j6wd36',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inc.Cool8',
|
||||
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
|
||||
@@ -57,7 +57,7 @@ class LA7IE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
|
||||
'service_url': 'http://kdam.iltrovatore.it',
|
||||
'service_url': 'http://nkdam.iltrovatore.it',
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': player_data['title'],
|
||||
|
@@ -1,33 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class LearnrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
|
||||
'md5': '3719fdf0a68397f49899e82c308a89de',
|
||||
'info_dict': {
|
||||
'id': '51624',
|
||||
'ext': 'mp4',
|
||||
'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
|
||||
'description': 'md5:b36dbfa92350176cdf12b4d388485503',
|
||||
'uploader': 'LearnCode.academy',
|
||||
'uploader_id': 'learncodeacademy',
|
||||
'upload_date': '20131021',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': self._search_regex(
|
||||
r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
|
||||
'id': video_id,
|
||||
}
|
@@ -6,8 +6,8 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@@ -19,6 +19,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class LecturioBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
|
||||
_LOGIN_URL = 'https://app.lecturio.com/en/login'
|
||||
_NETRC_MACHINE = 'lecturio'
|
||||
|
||||
@@ -67,51 +68,56 @@ class LecturioIE(LecturioBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https://
|
||||
(?:
|
||||
app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.lecture|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<id_de>[^/?#&]+)\.vortrag
|
||||
app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
|
||||
(?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
|
||||
'md5': 'f576a797a5b7a5e4e4bbdfc25a6a6870',
|
||||
'md5': '9a42cf1d8282a6311bf7211bbde26fde',
|
||||
'info_dict': {
|
||||
'id': '39634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Important Concepts and Terms – Introduction to Microbiology',
|
||||
'title': 'Important Concepts and Terms — Introduction to Microbiology',
|
||||
},
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}, {
|
||||
'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_CC_LANGS = {
|
||||
'Arabic': 'ar',
|
||||
'Bulgarian': 'bg',
|
||||
'German': 'de',
|
||||
'English': 'en',
|
||||
'Spanish': 'es',
|
||||
'Persian': 'fa',
|
||||
'French': 'fr',
|
||||
'Japanese': 'ja',
|
||||
'Polish': 'pl',
|
||||
'Pashto': 'ps',
|
||||
'Russian': 'ru',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id') or mobj.group('id_de')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://app.lecturio.com/en/lecture/%s/player.html' % display_id,
|
||||
display_id)
|
||||
|
||||
lecture_id = self._search_regex(
|
||||
r'lecture_id\s*=\s*(?:L_)?(\d+)', webpage, 'lecture id')
|
||||
|
||||
api_url = self._search_regex(
|
||||
r'lectureDataLink\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'api url', group='url')
|
||||
|
||||
video = self._download_json(api_url, display_id)
|
||||
|
||||
nt = mobj.group('nt') or mobj.group('nt_de')
|
||||
lecture_id = mobj.group('id')
|
||||
display_id = nt or lecture_id
|
||||
api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + api_path, display_id)
|
||||
title = video['title'].strip()
|
||||
if not lecture_id:
|
||||
pid = video.get('productId') or video.get('uid')
|
||||
if pid:
|
||||
spid = pid.split('_')
|
||||
if spid and len(spid) == 2:
|
||||
lecture_id = spid[1]
|
||||
|
||||
formats = []
|
||||
for format_ in video['content']['media']:
|
||||
@@ -129,24 +135,30 @@ class LecturioIE(LecturioBaseIE):
|
||||
continue
|
||||
label = str_or_none(format_.get('label'))
|
||||
filesize = int_or_none(format_.get('fileSize'))
|
||||
formats.append({
|
||||
f = {
|
||||
'url': file_url,
|
||||
'format_id': label,
|
||||
'filesize': float_or_none(filesize, invscale=1000)
|
||||
})
|
||||
}
|
||||
if label:
|
||||
mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
|
||||
if mobj:
|
||||
f.update({
|
||||
'format_id': mobj.group(2),
|
||||
'height': int(mobj.group(1)),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
cc = self._parse_json(
|
||||
self._search_regex(
|
||||
r'subtitleUrls\s*:\s*({.+?})\s*,', webpage, 'subtitles',
|
||||
default='{}'), display_id, fatal=False)
|
||||
for cc_label, cc_url in cc.items():
|
||||
cc_url = url_or_none(cc_url)
|
||||
captions = video.get('captions') or []
|
||||
for cc in captions:
|
||||
cc_url = cc.get('url')
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = self._search_regex(
|
||||
cc_label = cc.get('translatedCode')
|
||||
lang = cc.get('languageCode') or self._search_regex(
|
||||
r'/([a-z]{2})_', cc_url, 'lang',
|
||||
default=cc_label.split()[0] if cc_label else 'en')
|
||||
original_lang = self._search_regex(
|
||||
@@ -160,7 +172,7 @@ class LecturioIE(LecturioBaseIE):
|
||||
})
|
||||
|
||||
return {
|
||||
'id': lecture_id,
|
||||
'id': lecture_id or nt,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
@@ -169,37 +181,40 @@ class LecturioIE(LecturioBaseIE):
|
||||
|
||||
|
||||
class LecturioCourseIE(LecturioBaseIE):
|
||||
_VALID_URL = r'https://app\.lecturio\.com/[^/]+/(?P<id>[^/?#&]+)\.course'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
|
||||
'info_dict': {
|
||||
'id': 'microbiology-introduction',
|
||||
'title': 'Microbiology: Introduction',
|
||||
'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
|
||||
},
|
||||
'playlist_count': 45,
|
||||
'skip': 'Requires lecturio account credentials',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://app.lecturio.com/#/course/c/6434',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nt, course_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = nt or course_id
|
||||
api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
|
||||
course = self._download_json(
|
||||
self._API_BASE_URL + api_path, display_id)
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<[^>]+\bdata-url=(["\'])(?:(?!\1).)+\.lecture\b[^>]+>',
|
||||
webpage):
|
||||
params = extract_attributes(mobj.group(0))
|
||||
lecture_url = urljoin(url, params.get('data-url'))
|
||||
lecture_id = params.get('data-id')
|
||||
for lecture in course.get('lectures', []):
|
||||
lecture_id = str_or_none(lecture.get('id'))
|
||||
lecture_url = lecture.get('url')
|
||||
if lecture_url:
|
||||
lecture_url = urljoin(url, lecture_url)
|
||||
else:
|
||||
lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
|
||||
entries.append(self.url_result(
|
||||
lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
|
||||
|
||||
title = self._search_regex(
|
||||
r'<span[^>]+class=["\']content-title[^>]+>([^<]+)', webpage,
|
||||
'title', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title)
|
||||
return self.playlist_result(
|
||||
entries, display_id, course.get('title'),
|
||||
clean_html(course.get('description')))
|
||||
|
||||
|
||||
class LecturioDeCourseIE(LecturioBaseIE):
|
||||
|
@@ -326,7 +326,7 @@ class LetvCloudIE(InfoExtractor):
|
||||
elif play_json.get('code'):
|
||||
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
|
||||
else:
|
||||
raise ExtractorError('Letv cloud returned an unknwon error')
|
||||
raise ExtractorError('Letv cloud returned an unknown error')
|
||||
|
||||
def b64decode(s):
|
||||
return compat_b64decode(s).decode('utf-8')
|
||||
|
42
youtube_dl/extractor/livejournal.py
Normal file
42
youtube_dl/extractor/livejournal.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class LiveJournalIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
|
||||
'md5': 'adaf018388572ced8a6f301ace49d4b2',
|
||||
'info_dict': {
|
||||
'id': '1263729',
|
||||
'ext': 'mp4',
|
||||
'title': 'Истребители против БПЛА',
|
||||
'upload_date': '20190624',
|
||||
'timestamp': 1561406715,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
record = self._parse_json(self._search_regex(
|
||||
r'Site\.page\s*=\s*({.+?});', webpage,
|
||||
'page data'), video_id)['video']['record']
|
||||
storage_id = compat_str(record['storageid'])
|
||||
title = record.get('name')
|
||||
if title:
|
||||
# remove filename extension(.mp4, .mov, etc...)
|
||||
title = title.rsplit('.', 1)[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': record.get('thumbnail'),
|
||||
'timestamp': int_or_none(record.get('timecreate')),
|
||||
'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
|
||||
'ie_key': 'EaglePlatform',
|
||||
}
|
@@ -82,6 +82,10 @@ class LiveLeakIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No original video
|
||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -134,11 +138,13 @@ class LiveLeakIE(InfoExtractor):
|
||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
||||
if a_format['url'] != orig_url:
|
||||
format_id = a_format.get('format_id')
|
||||
formats.append({
|
||||
'format_id': 'original' + ('-' + format_id if format_id else ''),
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
||||
if self._is_valid_url(orig_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': orig_url,
|
||||
'preference': 1,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
info_dict['formats'] = formats
|
||||
|
||||
|
@@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Status="NotFound", Message="Transcript not found"
|
||||
'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _raise_unavailable(self, video_id):
|
||||
@@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE):
|
||||
|
||||
def _get_subtitles(self, video_id):
|
||||
url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
|
||||
subs = self._download_json(url, None, False)
|
||||
subs = self._download_webpage(
|
||||
url, video_id, 'Downloading subtitles JSON', fatal=False)
|
||||
if not subs or 'Status="NotFound"' in subs:
|
||||
return {}
|
||||
subs = self._parse_json(subs, video_id, fatal=False)
|
||||
if not subs:
|
||||
return {}
|
||||
fixed_subs = self._fix_subtitles(subs)
|
||||
if fixed_subs:
|
||||
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
|
||||
else:
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
class LyndaCourseIE(LyndaBaseIE):
|
||||
|
@@ -1,42 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MacGameStoreIE(InfoExtractor):
|
||||
IE_NAME = 'macgamestore'
|
||||
IE_DESC = 'MacGameStore trailers'
|
||||
_VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
|
||||
'md5': '8649b8ea684b6666b4c5be736ecddc61',
|
||||
'info_dict': {
|
||||
'id': '2450',
|
||||
'ext': 'm4v',
|
||||
'title': 'Crow',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, 'Downloading trailer page')
|
||||
|
||||
if '>Missing Media<' in webpage:
|
||||
raise ExtractorError(
|
||||
'Trailer %s does not exist' % video_id, expected=True)
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
|
||||
webpage, 'video URL')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': video_title
|
||||
}
|
@@ -1,32 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MakerTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
|
||||
_TEST = {
|
||||
'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
|
||||
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
||||
'info_dict': {
|
||||
'id': 'Fh3QgymL9gsc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maze Runner: The Scorch Trials Official Movie Review',
|
||||
'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
|
||||
'upload_date': '20150918',
|
||||
'timestamp': 1442549540,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': 'jwplatform:%s' % jwplatform_id,
|
||||
'ie_key': 'JWPlatform',
|
||||
}
|
@@ -10,18 +10,21 @@ from ..utils import int_or_none
|
||||
|
||||
|
||||
class MangomoloBaseIE(InfoExtractor):
|
||||
_BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
return page_id
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._get_real_id(self._match_id(url))
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
|
||||
hidden_inputs = self._hidden_inputs(webpage)
|
||||
m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
|
||||
|
||||
format_url = self._html_search_regex(
|
||||
[
|
||||
r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
|
||||
r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
|
||||
r'<a[^>]+href="(rtsp://[^"]+)"'
|
||||
], webpage, 'format url')
|
||||
formats = self._extract_wowza_formats(
|
||||
@@ -39,14 +42,16 @@ class MangomoloBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class MangomoloVideoIE(MangomoloBaseIE):
|
||||
IE_NAME = 'mangomolo:video'
|
||||
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
|
||||
_TYPE = 'video'
|
||||
IE_NAME = 'mangomolo:' + _TYPE
|
||||
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
|
||||
_IS_LIVE = False
|
||||
|
||||
|
||||
class MangomoloLiveIE(MangomoloBaseIE):
|
||||
IE_NAME = 'mangomolo:live'
|
||||
_VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
|
||||
_TYPE = 'live'
|
||||
IE_NAME = 'mangomolo:' + _TYPE
|
||||
_VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
|
||||
_IS_LIVE = True
|
||||
|
||||
def _get_real_id(self, page_id):
|
||||
|
@@ -27,7 +27,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
||||
player/index\.html\?.*?\bprogramGuid=
|
||||
)
|
||||
)(?P<id>[0-9A-Z]{16})
|
||||
)(?P<id>[0-9A-Z]{16,})
|
||||
'''
|
||||
_TESTS = [{
|
||||
# full episode
|
||||
@@ -62,7 +62,6 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
'uploader': 'Canale 5',
|
||||
'uploader_id': 'C5',
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 403: Forbidden'],
|
||||
}, {
|
||||
# clip
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
|
||||
@@ -78,6 +77,18 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}, {
|
||||
'url': 'mediaset:FAFU000000665924',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -109,6 +120,11 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
entries.append(embed_url)
|
||||
return entries
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||
video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
|
||||
return super()._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
guid = self._match_id(url)
|
||||
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
|
||||
@@ -118,14 +134,15 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
subtitles = {}
|
||||
first_e = None
|
||||
for asset_type in ('SD', 'HD'):
|
||||
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
|
||||
# TODO: fixup ISM+none manifest URLs
|
||||
for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
|
||||
try:
|
||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
|
||||
'mbr': 'true',
|
||||
'formats': f,
|
||||
'assetTypes': asset_type,
|
||||
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
|
||||
}), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
|
||||
except ExtractorError as e:
|
||||
if not first_e:
|
||||
first_e = e
|
||||
|
@@ -79,6 +79,10 @@ class MGTVIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'tbr': tbr,
|
||||
'protocol': 'm3u8_native',
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
'format_note': stream.get('name'),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
|
@@ -65,30 +65,6 @@ class TechTVMITIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class MITIE(TechTVMITIE):
|
||||
IE_NAME = 'video.mit.edu'
|
||||
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
||||
'md5': '7db01d5ccc1895fc5010e9c9e13648da',
|
||||
'info_dict': {
|
||||
'id': '21783',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Government is Profiling You',
|
||||
'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
page_title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, page_title)
|
||||
embed_url = self._search_regex(
|
||||
r'<iframe .*?src="(.+?)"', webpage, 'embed url')
|
||||
return self.url_result(embed_url)
|
||||
|
||||
|
||||
class OCWMITIE(InfoExtractor):
|
||||
IE_NAME = 'ocw.mit.edu'
|
||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||
|
@@ -86,9 +86,10 @@ class MixcloudIE(InfoExtractor):
|
||||
r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
|
||||
webpage, 'play info'), 'play info')
|
||||
for item in full_info_json:
|
||||
item_data = try_get(
|
||||
item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
||||
dict)
|
||||
item_data = try_get(item, [
|
||||
lambda x: x['cloudcast']['data']['cloudcastLookup'],
|
||||
lambda x: x['cloudcastLookup']['data']['cloudcastLookup'],
|
||||
], dict)
|
||||
if try_get(item_data, lambda x: x['streamInfo']['url']):
|
||||
info_json = item_data
|
||||
break
|
||||
@@ -164,7 +165,7 @@ class MixcloudIE(InfoExtractor):
|
||||
def decrypt_url(f_url):
|
||||
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
|
||||
decrypted_url = self._decrypt_xor_cipher(k, f_url)
|
||||
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||
if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||
return decrypted_url
|
||||
|
||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user