mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 22:28:37 +09:00
Compare commits
500 Commits
2018.05.01
...
2018.12.09
Author | SHA1 | Date | |
---|---|---|---|
![]() |
cefe42c412 | ||
![]() |
24cc64254c | ||
![]() |
9e02c2c704 | ||
![]() |
5ee7ae5c75 | ||
![]() |
3ad6dabd33 | ||
![]() |
5f47a60c5d | ||
![]() |
1bab343704 | ||
![]() |
1d88b3e6e6 | ||
![]() |
9235b5091c | ||
![]() |
c3c098dcf2 | ||
![]() |
8c5879715f | ||
![]() |
ebb0449049 | ||
![]() |
dfe0a3a9d2 | ||
![]() |
c976873c5b | ||
![]() |
15699ec8b0 | ||
![]() |
33cc1ea586 | ||
![]() |
ae9d77dab5 | ||
![]() |
8bb0c9cc16 | ||
![]() |
5547014ad9 | ||
![]() |
ab896fa894 | ||
![]() |
1fa59a928e | ||
![]() |
ce18a19be9 | ||
![]() |
1ead840d2c | ||
![]() |
aa374bc78e | ||
![]() |
3430ff9b07 | ||
![]() |
f012823082 | ||
![]() |
16597c2f94 | ||
![]() |
adbbdefc81 | ||
![]() |
053e5b12b2 | ||
![]() |
d9df8f120b | ||
![]() |
ca01d17884 | ||
![]() |
d19600df07 | ||
![]() |
641e86e3cf | ||
![]() |
6864855eb1 | ||
![]() |
d861a9d581 | ||
![]() |
66173211c4 | ||
![]() |
6f2883a2df | ||
![]() |
560020da30 | ||
![]() |
305ce767d5 | ||
![]() |
157eef3e63 | ||
![]() |
bd2d553c7b | ||
![]() |
af60e81e3c | ||
![]() |
a843464a7e | ||
![]() |
6866f24494 | ||
![]() |
4e33e0792a | ||
![]() |
35328915b5 | ||
![]() |
6c882aa899 | ||
![]() |
183417a50f | ||
![]() |
6a6d7f0641 | ||
![]() |
05bd5e9c77 | ||
![]() |
15ed5a2784 | ||
![]() |
2e1280ed43 | ||
![]() |
8578ea4dcb | ||
![]() |
9b27a78a88 | ||
![]() |
964b989dc8 | ||
![]() |
f97c099131 | ||
![]() |
1febf99da1 | ||
![]() |
4167148fa4 | ||
![]() |
5bb0479269 | ||
![]() |
02df855e13 | ||
![]() |
006374e3ae | ||
![]() |
11d19ff503 | ||
![]() |
a640c4d226 | ||
![]() |
d0058c76d5 | ||
![]() |
0919cd4d01 | ||
![]() |
2599956c9f | ||
![]() |
9b9b3501c5 | ||
![]() |
730c0d12a0 | ||
![]() |
f17a24a6df | ||
![]() |
83852e57bf | ||
![]() |
96a91b1551 | ||
![]() |
cab26223bf | ||
![]() |
532782ade1 | ||
![]() |
f81d44aab6 | ||
![]() |
2511eee215 | ||
![]() |
0df514f07e | ||
![]() |
432cd48410 | ||
![]() |
c0345b825f | ||
![]() |
2004e2210b | ||
![]() |
16d896b2a7 | ||
![]() |
22e07ce502 | ||
![]() |
dbdaaa231a | ||
![]() |
38c32dbf19 | ||
![]() |
a085410936 | ||
![]() |
6895ea4d3f | ||
![]() |
faac1c1f70 | ||
![]() |
573531dcfb | ||
![]() |
da56fb631f | ||
![]() |
95e42d7336 | ||
![]() |
cf0db4d997 | ||
![]() |
036f905161 | ||
![]() |
4b6aca17cc | ||
![]() |
c620694c97 | ||
![]() |
061ea3a776 | ||
![]() |
c70ba664f1 | ||
![]() |
f16679e843 | ||
![]() |
b14475724b | ||
![]() |
aa7e974a2a | ||
![]() |
9aac22c195 | ||
![]() |
94db1f7f3b | ||
![]() |
ffa7b2bfee | ||
![]() |
2943397e87 | ||
![]() |
9c4a83a1be | ||
![]() |
9ff558f67f | ||
![]() |
c2fe21efaa | ||
![]() |
476cf548e1 | ||
![]() |
bebef10909 | ||
![]() |
4c237ab787 | ||
![]() |
a1d1c63678 | ||
![]() |
1fafb32984 | ||
![]() |
c901cc38e5 | ||
![]() |
022218f2f0 | ||
![]() |
08c7d3dade | ||
![]() |
5e733b066a | ||
![]() |
7d9e858132 | ||
![]() |
b99b0bcfa0 | ||
![]() |
baeabf7742 | ||
![]() |
582797d780 | ||
![]() |
160c2773f6 | ||
![]() |
ee5fe42e44 | ||
![]() |
f0ee386851 | ||
![]() |
a94e7c195e | ||
![]() |
5d90a8a5f3 | ||
![]() |
19a352854f | ||
![]() |
c9d891f19a | ||
![]() |
d96f976b0c | ||
![]() |
2e7ed29e34 | ||
![]() |
21c1a00dd7 | ||
![]() |
0082f44a08 | ||
![]() |
f60b9803a4 | ||
![]() |
d98cb62e55 | ||
![]() |
05e7c184da | ||
![]() |
66d106f270 | ||
![]() |
3c7da54c92 | ||
![]() |
9795d93316 | ||
![]() |
365343131d | ||
![]() |
85fa80d5f9 | ||
![]() |
245cbb33bc | ||
![]() |
85cd69adcb | ||
![]() |
4c89a675dd | ||
![]() |
3d3499742c | ||
![]() |
c17e100b96 | ||
![]() |
8fd12a0831 | ||
![]() |
60ce0c67fd | ||
![]() |
cd5a74a28e | ||
![]() |
f6d7f7b474 | ||
![]() |
21160a1792 | ||
![]() |
4ac73fc170 | ||
![]() |
28fcb7b061 | ||
![]() |
3a9c928426 | ||
![]() |
d9b1cec171 | ||
![]() |
e504b09070 | ||
![]() |
c8f6ab8c38 | ||
![]() |
e2f61598be | ||
![]() |
c11485162b | ||
![]() |
1084563eaa | ||
![]() |
d9b0d118ad | ||
![]() |
8b40c92724 | ||
![]() |
3661ebf2b6 | ||
![]() |
0e7b8d3eac | ||
![]() |
127103b643 | ||
![]() |
d03beddf0f | ||
![]() |
dd4c449219 | ||
![]() |
6f1f59f39c | ||
![]() |
15bf2ca0da | ||
![]() |
0f2aa0dcaa | ||
![]() |
db348e8849 | ||
![]() |
f5b0175349 | ||
![]() |
79facb2773 | ||
![]() |
96dbf70de6 | ||
![]() |
8476b4fd91 | ||
![]() |
14f577e31c | ||
![]() |
25d110be30 | ||
![]() |
a2637a2dda | ||
![]() |
2e4350eec6 | ||
![]() |
2c9d3b9962 | ||
![]() |
13ef64fd93 | ||
![]() |
6f9f3340bb | ||
![]() |
ae2384ff5f | ||
![]() |
d0de6a287a | ||
![]() |
d0c5fabc12 | ||
![]() |
ad98d2eb74 | ||
![]() |
a41a506077 | ||
![]() |
9a47fa35dd | ||
![]() |
2d4fe594c6 | ||
![]() |
09322cccdb | ||
![]() |
aa1d5eb905 | ||
![]() |
93284ff2ea | ||
![]() |
0a9a8118ce | ||
![]() |
3d08f63dc5 | ||
![]() |
27d8e089a2 | ||
![]() |
7bbc1b189a | ||
![]() |
0b87e88453 | ||
![]() |
4d59db5b90 | ||
![]() |
4627995882 | ||
![]() |
7f2611cb5b | ||
![]() |
54a5be4dba | ||
![]() |
ed6919e737 | ||
![]() |
2b83da2463 | ||
![]() |
c1a37eb24a | ||
![]() |
4991e16c2a | ||
![]() |
14b7a24c19 | ||
![]() |
73f3bdbeb4 | ||
![]() |
9e21e6d96b | ||
![]() |
8959018a5f | ||
![]() |
eebbce5656 | ||
![]() |
56213aff1d | ||
![]() |
409b9324da | ||
![]() |
02df41354c | ||
![]() |
dd88fd65a5 | ||
![]() |
287cf7e443 | ||
![]() |
dac6f7654a | ||
![]() |
e0b6e98871 | ||
![]() |
beff09505c | ||
![]() |
135e6a1c10 | ||
![]() |
c707d2067d | ||
![]() |
4c86163b60 | ||
![]() |
b662273989 | ||
![]() |
df4d817bc3 | ||
![]() |
db192b2932 | ||
![]() |
52007de8ca | ||
![]() |
28f96cf407 | ||
![]() |
eda86b4335 | ||
![]() |
bf1245d236 | ||
![]() |
6f356cbbcf | ||
![]() |
0a74b45191 | ||
![]() |
d6ef8b4dd4 | ||
![]() |
60c0856223 | ||
![]() |
57c68ec4c3 | ||
![]() |
24e0cd709f | ||
![]() |
4779420ce8 | ||
![]() |
de4c41b437 | ||
![]() |
b65e3b0636 | ||
![]() |
d37dc6e1c9 | ||
![]() |
a62460aa21 | ||
![]() |
d588d4a5a6 | ||
![]() |
81cc22bab6 | ||
![]() |
20f96f64bd | ||
![]() |
af322eb830 | ||
![]() |
cb1c3a3c07 | ||
![]() |
48afc6ca3e | ||
![]() |
644921b372 | ||
![]() |
19b9de13c4 | ||
![]() |
6f2d82a5a0 | ||
![]() |
7ff129d3ea | ||
![]() |
9d1b213845 | ||
![]() |
5484828418 | ||
![]() |
4eecef84f3 | ||
![]() |
b2286f8fb2 | ||
![]() |
4938c8d573 | ||
![]() |
1a88fc5a69 | ||
![]() |
38e87f6c2a | ||
![]() |
ec240a4369 | ||
![]() |
cd3a3ff93b | ||
![]() |
9a984265b9 | ||
![]() |
a098c99f0d | ||
![]() |
8e37a7e4cc | ||
![]() |
722f1a0f8f | ||
![]() |
0c7b4f49eb | ||
![]() |
ad1bc71a8a | ||
![]() |
b5dec62ca6 | ||
![]() |
631f93ee2d | ||
![]() |
d4e7065111 | ||
![]() |
234a85858c | ||
![]() |
a789d1cc90 | ||
![]() |
694079dff7 | ||
![]() |
d94fb1225e | ||
![]() |
7930f91494 | ||
![]() |
a702056fbe | ||
![]() |
8fd2a7be37 | ||
![]() |
6de82b4476 | ||
![]() |
8e66ffc3b7 | ||
![]() |
6f27998e75 | ||
![]() |
3052a30d42 | ||
![]() |
4ecf300d13 | ||
![]() |
af03000ad5 | ||
![]() |
b96b4be461 | ||
![]() |
edb0e17188 | ||
![]() |
e9c671d5e8 | ||
![]() |
fd62b36680 | ||
![]() |
25586c601c | ||
![]() |
ecb6b6ae2d | ||
![]() |
c258570edd | ||
![]() |
6fc09f0155 | ||
![]() |
11330f5121 | ||
![]() |
8da17f9680 | ||
![]() |
c63f5fb863 | ||
![]() |
38f1eb0ac3 | ||
![]() |
371dcc1dd4 | ||
![]() |
bd21ead2a2 | ||
![]() |
905eef2b06 | ||
![]() |
79367a9820 | ||
![]() |
40a051fa9f | ||
![]() |
7e8e948cf7 | ||
![]() |
4b3ee09886 | ||
![]() |
79fd7320e2 | ||
![]() |
0685d9727b | ||
![]() |
e06632e3fe | ||
![]() |
69fcdb845b | ||
![]() |
6868d272e5 | ||
![]() |
4742150788 | ||
![]() |
4e71dfd819 | ||
![]() |
1ed0b2f74d | ||
![]() |
e15141adae | ||
![]() |
94fef94d9c | ||
![]() |
9a6628aaf9 | ||
![]() |
689af4960e | ||
![]() |
d5de0f21b9 | ||
![]() |
24d26ab380 | ||
![]() |
836ef4840f | ||
![]() |
5621c3222e | ||
![]() |
db5debf313 | ||
![]() |
8cee692b8b | ||
![]() |
973b6ceebb | ||
![]() |
eca1f0d115 | ||
![]() |
2160768a21 | ||
![]() |
267d81962a | ||
![]() |
9cf648c92b | ||
![]() |
5e8e2fa51f | ||
![]() |
d4a24f4091 | ||
![]() |
acbd0ff5df | ||
![]() |
7b393f9cc5 | ||
![]() |
c3bcd206eb | ||
![]() |
1f6cc5807e | ||
![]() |
c306f076ec | ||
![]() |
a0949fec08 | ||
![]() |
74caf528bc | ||
![]() |
9fb62e35f6 | ||
![]() |
b71cc71910 | ||
![]() |
a4ec45179e | ||
![]() |
30374f4d40 | ||
![]() |
91aa502d91 | ||
![]() |
f51f526b0a | ||
![]() |
c9b983ff82 | ||
![]() |
e730508827 | ||
![]() |
8b4b400aef | ||
![]() |
e12b4b8bcc | ||
![]() |
18806e3b6b | ||
![]() |
713afa705c | ||
![]() |
721a877d2f | ||
![]() |
9283d4ea03 | ||
![]() |
00a429bea3 | ||
![]() |
d391b7e23d | ||
![]() |
075a13d3e9 | ||
![]() |
8ba84e4600 | ||
![]() |
858cf4dc29 | ||
![]() |
9e761fe6f5 | ||
![]() |
ce0edda0f9 | ||
![]() |
0adf213d8c | ||
![]() |
8b183bd5f8 | ||
![]() |
1882511754 | ||
![]() |
764cd4e6f3 | ||
![]() |
734d461ca0 | ||
![]() |
81c5df4f2c | ||
![]() |
87f89dacdd | ||
![]() |
9b0b627534 | ||
![]() |
61cb66830f | ||
![]() |
c797db4a2f | ||
![]() |
03eef0f032 | ||
![]() |
aa56061627 | ||
![]() |
18d66f0410 | ||
![]() |
f15f7a674b | ||
![]() |
9aca7fe6a3 | ||
![]() |
e0671819e7 | ||
![]() |
5d6c81b63f | ||
![]() |
dc53c78634 | ||
![]() |
7dc9c60b4b | ||
![]() |
e51752754d | ||
![]() |
0645be49cb | ||
![]() |
a572ae6114 | ||
![]() |
b2df66aeca | ||
![]() |
93cffb1444 | ||
![]() |
d253df2f65 | ||
![]() |
e8c6afc168 | ||
![]() |
cc37cc3f99 | ||
![]() |
9d581efe05 | ||
![]() |
ff2e486221 | ||
![]() |
6ae36035d9 | ||
![]() |
9afd74d705 | ||
![]() |
2e6975306a | ||
![]() |
06ea7bdd99 | ||
![]() |
d7be705308 | ||
![]() |
2e190c2ad9 | ||
![]() |
94418c8eb3 | ||
![]() |
f7560859a3 | ||
![]() |
c6c478f40d | ||
![]() |
c3023e9f2e | ||
![]() |
77053237c5 | ||
![]() |
b6b2ccb72f | ||
![]() |
0a10f50e2f | ||
![]() |
6d155707e6 | ||
![]() |
eb6793ba97 | ||
![]() |
7e72694b5e | ||
![]() |
936784b272 | ||
![]() |
003fe73ccf | ||
![]() |
1ea559c445 | ||
![]() |
19e42ead9b | ||
![]() |
73c938e460 | ||
![]() |
9b89daefa6 | ||
![]() |
9d082e7cb8 | ||
![]() |
f20f636596 | ||
![]() |
b995043ab8 | ||
![]() |
85750f8972 | ||
![]() |
926d97fc6b | ||
![]() |
2593725a9b | ||
![]() |
0bfdcc1495 | ||
![]() |
c3f75e2454 | ||
![]() |
3a8e3730c1 | ||
![]() |
acca2ac7f3 | ||
![]() |
128b58ad13 | ||
![]() |
4fd1437d9d | ||
![]() |
e425710554 | ||
![]() |
bc3143ac5e | ||
![]() |
e0d42dd4b2 | ||
![]() |
a07879d6b2 | ||
![]() |
cfd7f2a636 | ||
![]() |
9c65c4a6cd | ||
![]() |
c9e12a618c | ||
![]() |
8882840ec5 | ||
![]() |
2ce35d9f43 | ||
![]() |
f16f48779c | ||
![]() |
ddd8486a44 | ||
![]() |
68217024e8 | ||
![]() |
ec2f3d2800 | ||
![]() |
8b1da46e8f | ||
![]() |
2a49d01992 | ||
![]() |
261f47306c | ||
![]() |
c0fd20abca | ||
![]() |
986c0b0215 | ||
![]() |
97b01144bd | ||
![]() |
56cd31f320 | ||
![]() |
c678192af3 | ||
![]() |
0934c9d4fa | ||
![]() |
38e4e8ab80 | ||
![]() |
5a16c9d9d3 | ||
![]() |
bdbcc8eecb | ||
![]() |
9ef5cdb5cb | ||
![]() |
03fad17cb6 | ||
![]() |
f4d261b765 | ||
![]() |
aee36ca832 | ||
![]() |
2a7c6befc1 | ||
![]() |
b39f42ee92 | ||
![]() |
6bd499e8ca | ||
![]() |
f2fc63a5a8 | ||
![]() |
c561b75c82 | ||
![]() |
3d2a643fdc | ||
![]() |
e8e58c2278 | ||
![]() |
1139935db7 | ||
![]() |
ca0aef42d4 | ||
![]() |
3bb3ff38a1 | ||
![]() |
268e132dec | ||
![]() |
670dcba8c7 | ||
![]() |
b836118724 | ||
![]() |
57d6792024 | ||
![]() |
b89ac53455 | ||
![]() |
d81ffc3aa0 | ||
![]() |
e518749300 | ||
![]() |
db2058f63e | ||
![]() |
5c766952dc | ||
![]() |
504f20dd30 | ||
![]() |
f2b1fa07ec | ||
![]() |
acd620c930 | ||
![]() |
27694fe7ad | ||
![]() |
0167f0dbfe | ||
![]() |
7550ea501a | ||
![]() |
58197205d3 | ||
![]() |
361a965b5c | ||
![]() |
a3f86160fa | ||
![]() |
1306f5ed72 | ||
![]() |
58a68d8fda | ||
![]() |
eea2fafcf5 | ||
![]() |
6843ac5b13 | ||
![]() |
54fc90aabf | ||
![]() |
997530d9d4 | ||
![]() |
fe3a60f040 | ||
![]() |
7f34984e81 | ||
![]() |
1e4fe5a7cc | ||
![]() |
c63ca0eef8 | ||
![]() |
84a9fef899 | ||
![]() |
4c76aa0666 | ||
![]() |
90b633f86b | ||
![]() |
07acdc5afc | ||
![]() |
49fa7de301 | ||
![]() |
dbd5c502ea | ||
![]() |
bc5e4aa57e | ||
![]() |
1344d3e169 | ||
![]() |
ff8889cd4d | ||
![]() |
9e18bb4c67 | ||
![]() |
44277998ad | ||
![]() |
05108a496a | ||
![]() |
2fbd86352e | ||
![]() |
0ce76801e8 | ||
![]() |
789b7774a7 | ||
![]() |
660a230b2d | ||
![]() |
a90a6b54ee | ||
![]() |
3cc0d0b829 | ||
![]() |
ea1f5e5dbd | ||
![]() |
5f95927a62 | ||
![]() |
a93ce61bd5 | ||
![]() |
c18142da6e |
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.01**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.09**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.05.01
|
||||
[debug] youtube-dl version 2018.12.09
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@@ -47,3 +47,7 @@ youtube-dl.zsh
|
||||
*.iml
|
||||
|
||||
tmp/
|
||||
venv/
|
||||
|
||||
# VS Code related files
|
||||
.vscode
|
||||
|
12
.travis.yml
12
.travis.yml
@@ -15,6 +15,18 @@ env:
|
||||
- YTDL_TEST_SET=download
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=core
|
||||
- python: 3.8-dev
|
||||
dist: xenial
|
||||
env: YTDL_TEST_SET=download
|
||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||
fast_finish: true
|
||||
|
7
AUTHORS
7
AUTHORS
@@ -239,3 +239,10 @@ Martin Weinelt
|
||||
Surya Oktafendri
|
||||
TingPing
|
||||
Alexandre Macabies
|
||||
Bastian de Groot
|
||||
Niklas Haas
|
||||
András Veres-Szentkirályi
|
||||
Enes Solak
|
||||
Nathan Rossi
|
||||
Thomas van der Berg
|
||||
Luca Cherubin
|
||||
|
@@ -296,5 +296,26 @@ title = self._search_regex(
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
|
561
ChangeLog
561
ChangeLog
@@ -1,3 +1,564 @@
|
||||
version 2018.12.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Keep session cookies in cookie file between runs
|
||||
* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
|
||||
|
||||
Extractors
|
||||
+ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
|
||||
+ [aenetworks] Add support for historyvault.com (#18460)
|
||||
* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
|
||||
#17223, #18404)
|
||||
* [iprima] Relax URL regular expression (#18453)
|
||||
* [hotstar] Fix video data extraction (#18386)
|
||||
* [ard:mediathek] Fix title and description extraction (#18349, #18371)
|
||||
* [xvideos] Switch to HTTPS (#18422, #18427)
|
||||
+ [lecturio] Add support for lecturio.com (#18405)
|
||||
+ [nrktv:series] Add support for extra materials
|
||||
* [nrktv:season,series] Fix extraction (#17159, #17258)
|
||||
* [nrktv] Relax URL regular expression (#18304, #18387)
|
||||
* [yourporn] Fix extraction (#18424, #18425)
|
||||
* [tbs] Fix info extraction (#18403)
|
||||
+ [gamespot] Add support for review URLs
|
||||
|
||||
|
||||
version 2018.12.03
|
||||
|
||||
Core
|
||||
* [utils] Fix random_birthday to generate existing dates only (#18284)
|
||||
|
||||
Extractors
|
||||
+ [tiktok] Add support for tiktok.com (#18108, #18135)
|
||||
* [pornhub] Use actual URL host for requests (#18359)
|
||||
* [lynda] Fix authentication (#18158, #18217)
|
||||
* [gfycat] Update API endpoint (#18333, #18343)
|
||||
+ [hotstar] Add support for alternative app state layout (#18320)
|
||||
* [azmedien] Fix extraction (#18334, #18336)
|
||||
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
|
||||
* [joj] Fix extraction (#18280, #18281)
|
||||
+ [wistia] Add support for fast.wistia.com (#18287)
|
||||
|
||||
|
||||
version 2018.11.23
|
||||
|
||||
Core
|
||||
+ [setup.py] Add more relevant classifiers
|
||||
|
||||
Extractors
|
||||
* [mixcloud] Fallback to hardcoded decryption key (#18016)
|
||||
* [nbc:news] Fix article extraction (#16194)
|
||||
* [foxsports] Fix extraction (#17543)
|
||||
* [loc] Relax regular expression and improve formats extraction
|
||||
+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
|
||||
* [nzz] Relax kaltura regex (#18228)
|
||||
* [sixplay] Fix formats extraction
|
||||
* [bitchute] Improve title extraction
|
||||
* [kaltura] Limit requested MediaEntry fields
|
||||
+ [americastestkitchen] Add support for zype embeds (#18225)
|
||||
+ [pornhub] Add pornhub.net alias
|
||||
* [nova:embed] Fix extraction (#18222)
|
||||
|
||||
|
||||
version 2018.11.18
|
||||
|
||||
Extractors
|
||||
+ [wwe] Extract subtitles
|
||||
+ [wwe] Add support for playlistst (#14781)
|
||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||
* [vk] Detect geo restriction (#17767)
|
||||
* [openload] Use original host during extraction (#18211)
|
||||
* [atvat] Fix extraction (#18041)
|
||||
+ [rte] Add support for new API endpoint (#18206)
|
||||
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||
* [picarto] Use API and add token support (#16518)
|
||||
+ [zype] Add support for player.zype.com (#18143)
|
||||
* [vivo] Fix extraction (#18139)
|
||||
* [ruutu] Update API endpoint (#18138)
|
||||
|
||||
|
||||
version 2018.11.07
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add another JS signature function name regex (#18091, #18093,
|
||||
#18094)
|
||||
* [facebook] Fix tahoe request (#17171)
|
||||
* [cliphunter] Fix extraction (#18083)
|
||||
+ [youtube:playlist] Add support for invidio.us (#18077)
|
||||
* [zattoo] Arrange API hosts for derived extractors (#18035)
|
||||
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
|
||||
|
||||
|
||||
version 2018.11.03
|
||||
|
||||
Core
|
||||
* [extractor/common] Ensure response handle is not prematurely closed before
|
||||
it can be read if it matches expected_status (#17195, #17846, #17447)
|
||||
|
||||
Extractors
|
||||
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
|
||||
+ [ehftv] Add support for ehftv.com (#15408)
|
||||
* [azmedien] Adopt to major site redesign (#17745, #17746)
|
||||
+ [twitcasting] Add support for twitcasting.tv (#17981)
|
||||
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
|
||||
+ [openload] Add support for oload.fun (#18045)
|
||||
* [njpwworld] Fix authentication (#17427)
|
||||
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
|
||||
* [theplatform] Improve error detection (#13222)
|
||||
* [cnbc] Simplify extraction (#14280, #17110)
|
||||
+ [cbnc] Add support for new URL schema (#14193)
|
||||
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
|
||||
* [aparat] Fix extraction
|
||||
|
||||
|
||||
version 2018.10.29
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add validation for JSON-LD URLs
|
||||
|
||||
Extractors
|
||||
+ [sportbox] Add support for matchtv.ru
|
||||
* [sportbox] Fix extraction (#17978)
|
||||
* [screencast] Fix extraction (#14590, #14617, #17990)
|
||||
+ [openload] Add support for oload.icu
|
||||
+ [ivi] Add support for ivi.tv
|
||||
* [crunchyroll] Improve extraction failsafeness (#17991)
|
||||
* [dailymail] Fix formats extraction (#17976)
|
||||
* [viewster] Reduce format requests
|
||||
* [cwtv] Handle API errors (#17905)
|
||||
+ [rutube] Use geo verification headers (#17897)
|
||||
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
|
||||
- [tv3] Remove extractor (#10461, #15339)
|
||||
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
|
||||
+ [openload] Add support for oload.cc (#17823)
|
||||
+ [patreon] Extract post_file URL (#17792)
|
||||
* [patreon] Fix extraction (#14502, #10471)
|
||||
|
||||
|
||||
version 2018.10.05
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Improve authentication (#17762)
|
||||
* [dailymotion] Fix extraction (#17699)
|
||||
* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
|
||||
+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
|
||||
* [philharmoniedeparis] Fix extraction (#17705)
|
||||
+ [jamendo] Add support for licensing.jamendo.com (#17724)
|
||||
+ [openload] Add support for oload.cloud (#17710)
|
||||
* [pluralsight] Fix subtitles extraction (#17726, #17728)
|
||||
+ [vimeo] Add another config regular expression (#17690)
|
||||
* [spike] Fix Paramount Network extraction (#17677)
|
||||
* [hotstar] Fix extraction (#14694, #14931, #17637)
|
||||
|
||||
|
||||
version 2018.09.26
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Fix subtitles extraction (#17671)
|
||||
* [mediaset] Improve embed support (#17668)
|
||||
+ [youtube] Add support for invidio.us (#17613)
|
||||
+ [zattoo] Add support for more zattoo platform sites
|
||||
* [zattoo] Fix extraction (#17175, #17542)
|
||||
|
||||
|
||||
version 2018.09.18
|
||||
|
||||
Core
|
||||
+ [extractor/common] Introduce channel meta fields
|
||||
|
||||
Extractors
|
||||
* [adobepass] Don't pollute default headers dict
|
||||
* [udemy] Don't pollute default headers dict
|
||||
* [twitch] Don't pollute default headers dict
|
||||
* [youtube] Don't pollute default query dict (#17593)
|
||||
* [crunchyroll] Prefer hardsubless formats and formats in locale language
|
||||
* [vrv] Make format ids deterministic
|
||||
* [vimeo] Fix ondemand playlist extraction (#14591)
|
||||
+ [pornhub] Extract upload date (#17574)
|
||||
+ [porntube] Extract channel meta fields
|
||||
+ [vimeo] Extract channel meta fields
|
||||
+ [youtube] Extract channel meta fields (#9676, #12939)
|
||||
* [porntube] Fix extraction (#17541)
|
||||
* [asiancrush] Fix extraction (#15630)
|
||||
+ [twitch:clips] Extend URL regular expression (closes #17559)
|
||||
+ [vzaar] Add support for HLS
|
||||
* [tube8] Fix metadata extraction (#17520)
|
||||
* [eporner] Extract JSON-LD (#17519)
|
||||
|
||||
|
||||
version 2018.09.10
|
||||
|
||||
Core
|
||||
+ [utils] Properly recognize AV1 codec (#17506)
|
||||
|
||||
Extractors
|
||||
+ [iprima] Add support for prima.iprima.cz (#17514)
|
||||
+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
|
||||
* [nbc] Fix extraction of percent encoded URLs (#17374)
|
||||
|
||||
|
||||
version 2018.09.08
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction (#17457, #17464)
|
||||
+ [pornhub:uservideos] Add support for new URLs (#17388)
|
||||
* [iprima] Confirm adult check (#17437)
|
||||
* [slideslive] Make check for video service name case-insensitive (#17429)
|
||||
* [radiojavan] Fix extraction (#17151)
|
||||
* [generic] Skip unsuccessful jwplayer extraction (#16735)
|
||||
|
||||
|
||||
version 2018.09.01
|
||||
|
||||
Core
|
||||
* [utils] Skip remote IP addresses non matching to source address' IP version
|
||||
when creating a connection (#13422, #17362)
|
||||
|
||||
Extractors
|
||||
+ [ard] Add support for one.ard.de (#17397)
|
||||
* [niconico] Fix extraction on python3 (#17393, #17407)
|
||||
* [ard] Extract f4m formats
|
||||
* [crunchyroll] Parse vilos media data (#17343)
|
||||
+ [ard] Add support for Beta ARD Mediathek
|
||||
+ [bandcamp] Extract more metadata (#13197)
|
||||
* [internazionale] Fix extraction of non-available-abroad videos (#17386)
|
||||
|
||||
|
||||
version 2018.08.28
|
||||
|
||||
Extractors
|
||||
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
|
||||
(#17361)
|
||||
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
|
||||
* [webofstories:playlist] Fix extraction (#16914)
|
||||
+ [tvplayhome] Add support for new tvplay URLs (#17344)
|
||||
+ [generic] Allow relative src for videojs embeds (#17324)
|
||||
+ [xfileshare] Add support for vidto.se (#17317)
|
||||
+ [vidzi] Add support for vidzi.nu (#17316)
|
||||
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
|
||||
|
||||
|
||||
version 2018.08.22
|
||||
|
||||
Core
|
||||
* [utils] Use pure browser header for User-Agent (#17236)
|
||||
|
||||
Extractors
|
||||
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
|
||||
+ [yourporn] Add support for yourporn.sexy (#17298)
|
||||
+ [go] Add support for disneynow.go.com (#16299, #17264)
|
||||
+ [6play] Add support for play.rtl.hr (#17249)
|
||||
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
|
||||
(#16788, #17254)
|
||||
* [lci] Fix extraction (#17274)
|
||||
* [bbccouk] Extend id URL regular expression (#17270)
|
||||
* [cwtv] Fix extraction (#17256)
|
||||
* [nova] Fix extraction (#17241)
|
||||
+ [generic] Add support for expressen embeds
|
||||
* [raywenderlich] Adapt to site redesign (#17225)
|
||||
+ [redbulltv] Add support redbull.com tv URLs (#17218)
|
||||
+ [bitchute] Add support for bitchute.com (#14052)
|
||||
+ [clyp] Add support for token protected media (#17184)
|
||||
* [imdb] Fix extension extraction (#17167)
|
||||
|
||||
|
||||
version 2018.08.04
|
||||
|
||||
Extractors
|
||||
* [funk:channel] Improve byChannelAlias extraction (#17142)
|
||||
* [twitch] Fix authentication (#17024, #17126)
|
||||
* [twitch:vod] Improve URL regular expression (#17135)
|
||||
* [watchbox] Fix extraction (#17107)
|
||||
* [pbs] Fix extraction (#17109)
|
||||
* [theplatform] Relax URL regular expression (#16181, #17097)
|
||||
+ [viqeo] Add support for viqeo.tv (#17066)
|
||||
|
||||
|
||||
version 2018.07.29
|
||||
|
||||
Extractors
|
||||
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
|
||||
+ [pornhub] Add support for subtitles (#16924, #17088)
|
||||
* [ceskatelevize] Use https for API call (#16997, #16999)
|
||||
* [dailymotion:playlist] Fix extraction (#16894)
|
||||
* [ted] Improve extraction
|
||||
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
|
||||
* [telecinco] Fix extraction (#17080)
|
||||
* [mitele] Reduce number of requests
|
||||
* [rai] Return non HTTP relinker URL intact (#17055)
|
||||
* [vk] Fix extraction for inline only videos (#16923)
|
||||
* [streamcloud] Fix extraction (#17054)
|
||||
* [facebook] Fix tahoe player extraction with authentication (#16655)
|
||||
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
|
||||
|
||||
|
||||
version 2018.07.21
|
||||
|
||||
Core
|
||||
+ [utils] Introduce url_or_none
|
||||
* [utils] Allow JSONP without function name (#17028)
|
||||
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
|
||||
|
||||
Extractors
|
||||
+ [bbc] Add support for BBC Radio Play pages (#17022)
|
||||
* [iwara] Fix download URLs (#17026)
|
||||
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
|
||||
+ [viu] Pass Referer and Origin headers and area id (#16992)
|
||||
+ [vimeo] Add another config regular expression (#17013)
|
||||
+ [facebook] Extract view count (#16942)
|
||||
* [dailymotion] Improve description extraction (#16984)
|
||||
* [slutload] Fix and improve extraction (#17001)
|
||||
* [mediaset] Fix extraction (#16977)
|
||||
+ [theplatform] Add support for theplatform TLD customization (#16977)
|
||||
* [imgur] Relax URL regular expression (#16987)
|
||||
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
|
||||
#16959)
|
||||
|
||||
|
||||
version 2018.07.10
|
||||
|
||||
Core
|
||||
* [utils] Share JSON-LD regular expression
|
||||
* [downloader/dash] Improve error handling (#16927)
|
||||
|
||||
Extractors
|
||||
+ [nrktv] Add support for new season and serie URL schema
|
||||
+ [nrktv] Add support for new episode URL schema (#16909)
|
||||
+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
|
||||
* [funk] Fix extraction (#16918)
|
||||
* [watchbox] Fix extraction (#16904)
|
||||
* [dplayit] Sort formats
|
||||
* [dplayit] Fix extraction (#16901)
|
||||
* [youtube] Improve login error handling (#13822)
|
||||
|
||||
|
||||
version 2018.07.04
|
||||
|
||||
Core
|
||||
* [extractor/common] Properly escape % in MPD templates (#16867)
|
||||
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
|
||||
* Prefer ffmpeg over avconv by default (#8622)
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
|
||||
* [lynda] Simplify login and improve error capturing (#16891)
|
||||
+ [go90] Add support for embed URLs (#16873)
|
||||
* [go90] Detect geo restriction error and pass geo verification headers
|
||||
(#16874)
|
||||
* [vlive] Fix live streams extraction (#16871)
|
||||
* [npo] Fix typo (#16872)
|
||||
+ [mediaset] Add support for new videos and extract all formats (#16568)
|
||||
* [dctptv] Restore extraction based on REST API (#16850)
|
||||
* [svt] Improve extraction and add support for pages (#16802)
|
||||
* [porncom] Fix extraction (#16808)
|
||||
|
||||
|
||||
version 2018.06.25
|
||||
|
||||
Extractors
|
||||
* [joj] Relax URL regular expression (#16771)
|
||||
* [brightcove] Workaround sonyliv DRM protected videos (#16807)
|
||||
* [motherless] Fix extraction (#16786)
|
||||
* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
|
||||
- [foxnews:insider] Remove extractor (#15810)
|
||||
+ [foxnews] Add support for iframe embeds (#15810, #16711)
|
||||
|
||||
|
||||
version 2018.06.19
|
||||
|
||||
Core
|
||||
+ [extractor/common] Introduce expected_status in _download_* methods
|
||||
for convenient accept of HTTP requests failed with non 2xx status codes
|
||||
+ [compat] Introduce compat_integer_types
|
||||
|
||||
Extractors
|
||||
* [peertube] Improve generic support (#16733)
|
||||
+ [6play] Use geo verification headers
|
||||
* [rtbf] Fix extraction for python 3.2
|
||||
* [vgtv] Improve HLS formats extraction
|
||||
+ [vgtv] Add support for www.aftonbladet.se/tv URLs
|
||||
* [bbccouk] Use expected_status
|
||||
* [markiza] Expect 500 HTTP status code
|
||||
* [tvnow] Try all clear manifest URLs (#15361)
|
||||
|
||||
|
||||
version 2018.06.18
|
||||
|
||||
Core
|
||||
* [downloader/rtmp] Fix downloading in verbose mode (#16736)
|
||||
|
||||
Extractors
|
||||
+ [markiza] Add support for markiza.sk (#16750)
|
||||
* [wat] Try all supported adaptive URLs
|
||||
+ [6play] Add support for rtlplay.be and extract hd usp formats
|
||||
+ [rtbf] Add support for audio and live streams (#9638, #11923)
|
||||
+ [rtbf] Extract HLS, DASH and all HTTP formats
|
||||
+ [rtbf] Extract subtitles
|
||||
+ [rtbf] Fixup specific HTTP URLs (#16101)
|
||||
+ [expressen] Add support for expressen.se
|
||||
* [vidzi] Fix extraction (#16678)
|
||||
* [pbs] Improve extraction (#16623, #16684)
|
||||
* [bilibili] Restrict cid regular expression (#16638, #16734)
|
||||
|
||||
|
||||
version 2018.06.14
|
||||
|
||||
Core
|
||||
* [downloader/http] Fix retry on error when streaming to stdout (#16699)
|
||||
|
||||
Extractors
|
||||
+ [discoverynetworks] Add support for disco-api videos (#16724)
|
||||
+ [dailymotion] Add support for password protected videos (#9789)
|
||||
+ [abc:iview] Add support for livestreams (#12354)
|
||||
* [abc:iview] Fix extraction (#16704)
|
||||
+ [crackle] Add support for sonycrackle.com (#16698)
|
||||
+ [tvnet] Add support for tvnet.gov.vn (#15462)
|
||||
* [nrk] Update API hosts and try all previously known ones (#16690)
|
||||
* [wimp] Fix Youtube embeds extraction
|
||||
|
||||
|
||||
version 2018.06.11
|
||||
|
||||
Extractors
|
||||
* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
|
||||
+ [inc] Add support for another embed schema (#16666)
|
||||
* [tv4] Fix format extraction (#16650)
|
||||
+ [nexx] Add support for free cdn (#16538)
|
||||
+ [pbs] Add another cove id pattern (#15373)
|
||||
+ [rbmaradio] Add support for 192k format (#16631)
|
||||
|
||||
|
||||
version 2018.06.04
|
||||
|
||||
Extractors
|
||||
+ [camtube] Add support for camtube.co
|
||||
+ [twitter:card] Extract guest token (#16609)
|
||||
+ [chaturbate] Use geo verification headers
|
||||
+ [bbc] Add support for bbcthree (#16612)
|
||||
* [youtube] Move metadata extraction after video availability check
|
||||
+ [youtube] Extract track and artist
|
||||
+ [safari] Add support for new URL schema (#16614)
|
||||
* [adn] Fix extraction
|
||||
|
||||
|
||||
version 2018.06.02
|
||||
|
||||
Core
|
||||
* [utils] Improve determine_ext
|
||||
|
||||
Extractors
|
||||
+ [facebook] Add support for tahoe player videos (#15441, #16554)
|
||||
* [cbc] Improve extraction (#16583, #16593)
|
||||
* [openload] Improve ext extraction (#16595)
|
||||
+ [twitter:card] Add support for another endpoint (#16586)
|
||||
+ [openload] Add support for oload.win and oload.download (#16592)
|
||||
* [audimedia] Fix extraction (#15309)
|
||||
+ [francetv] Add support for sport.francetvinfo.fr (#15645)
|
||||
* [mlb] Improve extraction (#16587)
|
||||
- [nhl] Remove old extractors
|
||||
* [rbmaradio] Check formats availability (#16585)
|
||||
|
||||
|
||||
version 2018.05.30
|
||||
|
||||
Core
|
||||
* [downloader/rtmp] Generalize download messages and report time elapsed
|
||||
on finish
|
||||
* [downloader/rtmp] Gracefully handle live streams interrupted by user
|
||||
|
||||
Extractors
|
||||
* [teamcoco] Fix extraction for full episodes (#16573)
|
||||
* [spiegel] Fix info extraction (#16538)
|
||||
+ [apa] Add support for apa.at (#15041, #15672)
|
||||
+ [bellmedia] Add support for bnnbloomberg.ca (#16560)
|
||||
+ [9c9media] Extract MPD formats and subtitles
|
||||
* [cammodels] Use geo verification headers
|
||||
+ [ufctv] Add support for authentication (#16542)
|
||||
+ [cammodels] Add support for cammodels.com (#14499)
|
||||
* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
|
||||
(#16551)
|
||||
* [soundcloud] Detect format extension (#16549)
|
||||
* [cbc] Fix playlist title extraction (#16502)
|
||||
+ [tumblr] Detect and report sensitive media (#13829)
|
||||
+ [tumblr] Add support for authentication (#15133)
|
||||
|
||||
|
||||
version 2018.05.26
|
||||
|
||||
Core
|
||||
* [utils] Improve parse_age_limit
|
||||
|
||||
Extractors
|
||||
* [audiomack] Stringify video id (#15310)
|
||||
* [izlesene] Fix extraction (#16233, #16271, #16407)
|
||||
+ [indavideo] Add support for generic embeds (#11989)
|
||||
* [indavideo] Fix extraction (#11221)
|
||||
* [indavideo] Sign download URLs (#16174)
|
||||
+ [peertube] Add support for PeerTube based sites (#16301, #16329)
|
||||
* [imgur] Fix extraction (#16537)
|
||||
+ [hidive] Add support for authentication (#16534)
|
||||
+ [nbc] Add support for stream.nbcsports.com (#13911)
|
||||
+ [viewlift] Add support for hoichoi.tv (#16536)
|
||||
* [go90] Extract age limit and detect DRM protection(#10127)
|
||||
* [viewlift] fix extraction for snagfilms.com (#15766)
|
||||
* [globo] Improve extraction (#4189)
|
||||
* Add support for authentication
|
||||
* Simplify URL signing
|
||||
* Extract DASH and MSS formats
|
||||
* [leeco] Fix extraction (#16464)
|
||||
* [teamcoco] Add fallback for format extraction (#16484)
|
||||
* [teamcoco] Improve URL regular expression (#16484)
|
||||
* [imdb] Improve extraction (#4085, #14557)
|
||||
|
||||
|
||||
version 2018.05.18
|
||||
|
||||
Extractors
|
||||
* [vimeo:likes] Relax URL regular expression and fix single page likes
|
||||
extraction (#16475)
|
||||
* [pluralsight] Fix clip id extraction (#16460)
|
||||
+ [mychannels] Add support for mychannels.com (#15334)
|
||||
- [moniker] Remove extractor (#15336)
|
||||
* [pbs] Fix embed data extraction (#16474)
|
||||
+ [mtv] Add support for paramountnetwork.com and bellator.com (#15418)
|
||||
* [youtube] Fix hd720 format position
|
||||
* [dailymotion] Remove fragment part from m3u8 URLs (#8915)
|
||||
* [3sat] Improve extraction (#15350)
|
||||
* Extract all formats
|
||||
* Extract more format metadata
|
||||
* Improve format sorting
|
||||
* Use hls native downloader
|
||||
* Detect and bypass geo-restriction
|
||||
+ [dtube] Add support for d.tube (#15201)
|
||||
* [options] Fix typo (#16450)
|
||||
* [youtube] Improve format filesize extraction (#16453)
|
||||
* [youtube] Make uploader extraction non fatal (#16444)
|
||||
* [youtube] Fix extraction for embed restricted live streams (#16433)
|
||||
* [nbc] Improve info extraction (#16440)
|
||||
* [twitch:clips] Fix extraction (#16429)
|
||||
* [redditr] Relax URL regular expression (#16426, #16427)
|
||||
* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424)
|
||||
+ [nick] Add support for nickjr.de (#13230)
|
||||
* [teamcoco] Fix extraction (#16374)
|
||||
|
||||
|
||||
version 2018.05.09
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Ensure ext exists for automatic captions
|
||||
* Introduce --geo-bypass-ip-block
|
||||
|
||||
Extractors
|
||||
+ [udemy] Extract asset captions
|
||||
+ [udemy] Extract stream URLs (#16372)
|
||||
+ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389)
|
||||
+ [cloudflarestream] Add support for cloudflarestream.com (#16375)
|
||||
* [watchbox] Fix extraction (#16356)
|
||||
* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954)
|
||||
+ [itv:btcc] Add support for itv.com/btcc (#16139)
|
||||
* [tunein] Use live title for live streams (#16347)
|
||||
* [itv] Improve extraction (#16253)
|
||||
|
||||
|
||||
version 2018.05.01
|
||||
|
||||
Core
|
||||
|
53
README.md
53
README.md
@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
|
||||
# INSTALLATION
|
||||
|
||||
To install it right away for all UNIX users (Linux, OS X, etc.), type:
|
||||
To install it right away for all UNIX users (Linux, macOS, etc.), type:
|
||||
|
||||
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
|
||||
sudo chmod a+rx /usr/local/bin/youtube-dl
|
||||
@@ -35,7 +35,7 @@ You can also use pip:
|
||||
|
||||
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
|
||||
|
||||
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
|
||||
|
||||
brew install youtube-dl
|
||||
|
||||
@@ -93,8 +93,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy.
|
||||
To enable experimental SOCKS proxy, specify
|
||||
a proper scheme. For example
|
||||
To enable SOCKS proxy, specify a proper
|
||||
scheme. For example
|
||||
socks5://127.0.0.1:1080/. Pass in an empty
|
||||
string (--proxy "") for direct connection
|
||||
--socket-timeout SECONDS Time to wait before giving up, in seconds
|
||||
@@ -106,16 +106,18 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--geo-verification-proxy URL Use this proxy to verify the IP address for
|
||||
some geo-restricted sites. The default
|
||||
proxy specified by --proxy (or none, if the
|
||||
options is not present) is used for the
|
||||
option is not present) is used for the
|
||||
actual downloading.
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header (experimental)
|
||||
X-Forwarded-For HTTP header
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
(experimental)
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
explicitly provided two-letter ISO 3166-2
|
||||
country code (experimental)
|
||||
country code
|
||||
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
|
||||
explicitly provided IP block in CIDR
|
||||
notation
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
@@ -206,7 +208,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--playlist-random Download playlist videos in random order
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with
|
||||
expected file size (experimental)
|
||||
expected file size
|
||||
--hls-prefer-native Use the native HLS downloader instead of
|
||||
ffmpeg
|
||||
--hls-prefer-ffmpeg Use ffmpeg instead of the native HLS
|
||||
@@ -425,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
default; fix file if we can, warn
|
||||
otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the
|
||||
postprocessors (default)
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors
|
||||
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
|
||||
postprocessors (default)
|
||||
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
|
||||
either the path to the binary or its
|
||||
containing directory.
|
||||
@@ -440,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||
|
||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||
```
|
||||
@@ -509,6 +511,8 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `channel` (string): Full name of the channel the video is uploaded on
|
||||
- `channel_id` (string): Id of the channel
|
||||
- `location` (string): Physical location where the video was filmed
|
||||
- `duration` (numeric): Length of the video in seconds
|
||||
- `view_count` (numeric): How many users have watched the video on the platform
|
||||
@@ -868,7 +872,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
|
||||
|
||||
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
|
||||
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
|
||||
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
|
||||
|
||||
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
|
||||
|
||||
@@ -1164,7 +1168,28 @@ title = self._search_regex(
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
|
@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
|
||||
for fn in glob.glob('*.html*'):
|
||||
with io.open(fn, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
|
||||
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
|
||||
if content != newc:
|
||||
tmpFn = fn + '.part'
|
||||
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:
|
||||
|
@@ -15,7 +15,6 @@
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
- **9c9media:stack**
|
||||
- **9gag**
|
||||
- **9now.com.au**
|
||||
- **abc.net.au**
|
||||
@@ -34,7 +33,7 @@
|
||||
- **AdobeTVShow**
|
||||
- **AdobeTVVideo**
|
||||
- **AdultSwim**
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
|
||||
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
|
||||
- **afreecatv**: afreecatv.com
|
||||
- **AirMozilla**
|
||||
- **AliExpressLive**
|
||||
@@ -48,6 +47,7 @@
|
||||
- **anitube.se**
|
||||
- **Anvato**
|
||||
- **AnySex**
|
||||
- **APA**
|
||||
- **Aparat**
|
||||
- **AppleConnect**
|
||||
- **AppleDaily**: 臺灣蘋果日報
|
||||
@@ -56,6 +56,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
@@ -83,8 +84,6 @@
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@@ -97,9 +96,11 @@
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BBVTV**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
- **Bellator**
|
||||
- **BellMedia**
|
||||
- **Bet**
|
||||
- **Bigflix**
|
||||
@@ -107,6 +108,8 @@
|
||||
- **BiliBili**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
@@ -122,10 +125,13 @@
|
||||
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BusinessInsider**
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**
|
||||
- **Camdemy**
|
||||
- **CamdemyFolder**
|
||||
- **CamModels**
|
||||
- **CamTube**
|
||||
- **CamWithHer**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
@@ -157,18 +163,22 @@
|
||||
- **chirbit**
|
||||
- **chirbit:profile**
|
||||
- **Cinchcast**
|
||||
- **CiscoLiveSearch**
|
||||
- **CiscoLiveSession**
|
||||
- **CJSW**
|
||||
- **cliphunter**
|
||||
- **Clippit**
|
||||
- **ClipRs**
|
||||
- **Clipsyndicate**
|
||||
- **CloserToTruth**
|
||||
- **CloudflareStream**
|
||||
- **cloudtime**: CloudTime
|
||||
- **Cloudy**
|
||||
- **Clubic**
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNBC**
|
||||
- **CNBCVideo**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
@@ -184,7 +194,7 @@
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
@@ -232,6 +242,7 @@
|
||||
- **DrTuber**
|
||||
- **drtv**
|
||||
- **drtv:live**
|
||||
- **DTube**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**
|
||||
@@ -241,7 +252,9 @@
|
||||
- **EchoMsk**
|
||||
- **egghead:course**: egghead.io course
|
||||
- **egghead:lesson**: egghead.io lesson
|
||||
- **ehftv**
|
||||
- **eHow**
|
||||
- **EinsUndEinsTV**
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **EllenTube**
|
||||
@@ -259,7 +272,9 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
- **ExtremeTube**
|
||||
- **EyedoTV**
|
||||
- **facebook**
|
||||
@@ -283,7 +298,6 @@
|
||||
- **Foxgay**
|
||||
- **foxnews**: Fox News and Fox Business Video
|
||||
- **foxnews:article**
|
||||
- **foxnews:insider**
|
||||
- **FoxSports**
|
||||
- **france2.fr:generation-what**
|
||||
- **FranceCulture**
|
||||
@@ -296,6 +310,9 @@
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **FreshLive**
|
||||
- **FrontendMasters**
|
||||
- **FrontendMastersCourse**
|
||||
- **FrontendMastersLesson**
|
||||
- **Funimation**
|
||||
- **FunkChannel**
|
||||
- **FunkMix**
|
||||
@@ -315,6 +332,7 @@
|
||||
- **Gfycat**
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
- **GlattvisionTV**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
@@ -344,7 +362,7 @@
|
||||
- **HitRecord**
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **HotStar**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@@ -358,10 +376,10 @@
|
||||
- **imdb**: Internet Movie Database trailers
|
||||
- **imdb:list**: Internet Movie Database lists
|
||||
- **Imgur**
|
||||
- **ImgurAlbum**
|
||||
- **imgur:album**
|
||||
- **imgur:gallery**
|
||||
- **Ina**
|
||||
- **Inc**
|
||||
- **Indavideo**
|
||||
- **IndavideoEmbed**
|
||||
- **InfoQ**
|
||||
- **Instagram**
|
||||
@@ -373,6 +391,7 @@
|
||||
- **Ir90Tv**
|
||||
- **ITTF**
|
||||
- **ITV**
|
||||
- **ITVBTCC**
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
@@ -396,6 +415,7 @@
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KinoPoisk**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
@@ -416,6 +436,8 @@
|
||||
- **Le**: 乐视网
|
||||
- **Learnr**
|
||||
- **Lecture2Go**
|
||||
- **Lecturio**
|
||||
- **LecturioCourse**
|
||||
- **LEGO**
|
||||
- **Lemonde**
|
||||
- **Lenta**
|
||||
@@ -428,6 +450,8 @@
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@@ -445,11 +469,12 @@
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
- **mailru:music:search**: Музыка@Mail.Ru
|
||||
- **MakersChannel**
|
||||
- **MakerTV**
|
||||
- **mangomolo:live**
|
||||
- **mangomolo:video**
|
||||
- **ManyVids**
|
||||
- **Markiza**
|
||||
- **MarkizaPage**
|
||||
- **massengeschmack.tv**
|
||||
- **MatchTV**
|
||||
- **MDR**: MDR.DE and KiKA
|
||||
@@ -480,10 +505,10 @@
|
||||
- **Mixer:vod**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MNetTV**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
- **Mojvideo**
|
||||
- **Moniker**: allmyvideos.net and vidspot.net
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
- **MotherlessGroup**
|
||||
@@ -505,12 +530,14 @@
|
||||
- **mva:course**: Microsoft Virtual Academy courses
|
||||
- **Mwave**
|
||||
- **MwaveMeetGreet**
|
||||
- **MyChannels**
|
||||
- **MySpace**
|
||||
- **MySpace:album**
|
||||
- **MySpass**
|
||||
- **Myvi**
|
||||
- **MyVidster**
|
||||
- **MyviEmbed**
|
||||
- **MyVisionTV**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
@@ -522,6 +549,7 @@
|
||||
- **nbcolympics**
|
||||
- **nbcolympics:stream**
|
||||
- **NBCSports**
|
||||
- **NBCSportsStream**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
@@ -535,6 +563,7 @@
|
||||
- **netease:program**: 网易云音乐 - 电台节目
|
||||
- **netease:singer**: 网易云音乐 - 歌手
|
||||
- **netease:song**: 网易云音乐
|
||||
- **NetPlus**
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **NewgroundsPlaylist**
|
||||
@@ -548,9 +577,6 @@
|
||||
- **nfl.com**
|
||||
- **NhkVod**
|
||||
- **nhl.com**
|
||||
- **nhl.com:news**: NHL news
|
||||
- **nhl.com:videocenter**
|
||||
- **nhl.com:videocenter:category**: NHL videocenter category
|
||||
- **nick.com**
|
||||
- **nick.de**
|
||||
- **nickelodeon:br**
|
||||
@@ -569,6 +595,7 @@
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@@ -584,7 +611,9 @@
|
||||
- **NRKSkole**: NRK Skole
|
||||
- **NRKTV**: NRK TV and NRK Radio
|
||||
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
|
||||
- **NRKTVEpisode**
|
||||
- **NRKTVEpisodes**
|
||||
- **NRKTVSeason**
|
||||
- **NRKTVSeries**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@@ -611,15 +640,18 @@
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
- **pandora.tv**: 판도라TV
|
||||
- **ParamountNetwork**
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **pcmag**
|
||||
- **PearVideo**
|
||||
- **PeerTube**
|
||||
- **People**
|
||||
- **PerformGroup**
|
||||
- **periscope**: Periscope
|
||||
@@ -660,6 +692,8 @@
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **puhutv**
|
||||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **qqmusic**: QQ音乐
|
||||
@@ -667,6 +701,7 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**
|
||||
- **Quickline**
|
||||
- **QuicklineLive**
|
||||
- **R7**
|
||||
@@ -682,6 +717,7 @@
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@@ -733,6 +769,7 @@
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **SAKTV**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@@ -786,9 +823,9 @@
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
- **Spiegeltv**
|
||||
- **Spike**
|
||||
- **sport.francetvinfo.fr**
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
@@ -806,6 +843,7 @@
|
||||
- **StretchInternet**
|
||||
- **SunPorno**
|
||||
- **SVT**
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **SWRMediathek**
|
||||
@@ -818,6 +856,8 @@
|
||||
- **TastyTrade**
|
||||
- **TBS**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**
|
||||
- **TeachableCourse**
|
||||
- **teachertube**: teachertube.com videos
|
||||
- **teachertube:user:collection**: teachertube.com user and collection videos
|
||||
- **TeachingChannel**
|
||||
@@ -826,6 +866,7 @@
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **Tele13**
|
||||
- **Tele5**
|
||||
- **TeleBruxelles**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
@@ -849,6 +890,8 @@
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
- **ThisOldHouse**
|
||||
- **TikTok**
|
||||
- **TikTokUser**
|
||||
- **tinypic**: tinypic.com videos
|
||||
- **TMZ**
|
||||
- **TMZArticle**
|
||||
@@ -877,7 +920,6 @@
|
||||
- **TV2**
|
||||
- **tv2.hu**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TVA**
|
||||
@@ -888,6 +930,7 @@
|
||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||
- **tvland.com**
|
||||
- **TVN24**
|
||||
- **TVNet**
|
||||
- **TVNoe**
|
||||
- **TVNow**
|
||||
- **TVNowList**
|
||||
@@ -896,7 +939,9 @@
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:profile**
|
||||
@@ -921,8 +966,6 @@
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
- **uplynk:preplay**
|
||||
- **Upskill**
|
||||
- **UpskillCourse**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **URPlay**
|
||||
- **USANetwork**
|
||||
@@ -941,6 +984,7 @@
|
||||
- **VevoPlaylist**
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **vhx:embed**
|
||||
- **Viafree**
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
@@ -985,6 +1029,7 @@
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Viqeo**
|
||||
- **Viu**
|
||||
- **viu:ott**
|
||||
- **viu:playlist**
|
||||
@@ -1010,12 +1055,14 @@
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
- **VShare**
|
||||
- **VTXTV**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Walla**
|
||||
- **WalyTV**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
@@ -1041,6 +1088,7 @@
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
@@ -1076,6 +1124,7 @@
|
||||
- **YouNowLive**
|
||||
- **YouNowMoment**
|
||||
- **YouPorn**
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
@@ -1099,3 +1148,4 @@
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
@@ -2,5 +2,5 @@
|
||||
universal = True
|
||||
|
||||
[flake8]
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
|
||||
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
|
||||
ignore = E402,E501,E731,E741
|
||||
|
9
setup.py
9
setup.py
@@ -124,6 +124,8 @@ setup(
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'License :: Public Domain',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
@@ -132,6 +134,13 @@ setup(
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: IronPython',
|
||||
'Programming Language :: Python :: Implementation :: Jython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
],
|
||||
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
|
@@ -7,6 +7,7 @@ import json
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
@@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
|
||||
real_warning(w)
|
||||
|
||||
ydl.report_warning = _report_warning
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
@@ -9,11 +9,30 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.compat import compat_etree_fromstring
|
||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
import threading
|
||||
|
||||
|
||||
TEAPOT_RESPONSE_STATUS = 418
|
||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||
|
||||
|
||||
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/teapot':
|
||||
self.send_response(TEAPOT_RESPONSE_STATUS)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
def test_response_with_expected_status_returns_content(self):
|
||||
# Checks for mitigations against the effects of
|
||||
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
|
||||
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
|
||||
# or the underlying `_download_webpage_handle` returning no content
|
||||
# when a response matches `expected_status`.
|
||||
|
||||
httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
port = http_server_port(httpd)
|
||||
server_thread = threading.Thread(target=httpd.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
|
||||
(content, urlh) = self.ie._download_webpage_handle(
|
||||
'http://127.0.0.1:%d/teapot' % port, None,
|
||||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
34
test/test_YoutubeDLCookieJar.py
Normal file
34
test/test_YoutubeDLCookieJar.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import YoutubeDLCookieJar
|
||||
|
||||
|
||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
temp = tf.read().decode('utf-8')
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
|
||||
finally:
|
||||
tf.close()
|
||||
os.remove(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
|
||||
|
||||
def test_compat_expanduser(self):
|
||||
old_home = os.environ.get('HOME')
|
||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
||||
test_str = r'C:\Documents and Settings\тест\Application Data'
|
||||
compat_setenv('HOME', test_str)
|
||||
self.assertEqual(compat_expanduser('~'), test_str)
|
||||
compat_setenv('HOME', old_home or '')
|
||||
|
@@ -9,26 +9,16 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import http_server_port, try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
|
@@ -8,6 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import http_server_port
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
@@ -16,15 +17,6 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
|
||||
class TestMetadataFromTitle(unittest.TestCase):
|
||||
def test_format_to_regex(self):
|
||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||
|
@@ -78,6 +78,7 @@ from youtube_dl.utils import (
|
||||
uppercase_escape,
|
||||
lowercase_escape,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
base_url,
|
||||
urljoin,
|
||||
urlencode_postdata,
|
||||
@@ -361,6 +362,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
||||
self.assertEqual(determine_ext('foobar', None), None)
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@@ -506,6 +508,16 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
|
||||
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
|
||||
|
||||
def test_url_or_none(self):
|
||||
self.assertEqual(url_or_none(None), None)
|
||||
self.assertEqual(url_or_none(''), None)
|
||||
self.assertEqual(url_or_none('foo'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
|
||||
self.assertEqual(url_or_none('http$://foo.de'), None)
|
||||
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
|
||||
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
|
||||
|
||||
def test_parse_age_limit(self):
|
||||
self.assertEqual(parse_age_limit(None), None)
|
||||
self.assertEqual(parse_age_limit(False), None)
|
||||
@@ -519,6 +531,8 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||
self.assertEqual(parse_age_limit('TV14'), 14)
|
||||
self.assertEqual(parse_age_limit('TV_G'), 0)
|
||||
|
||||
def test_parse_duration(self):
|
||||
self.assertEqual(parse_duration(None), None)
|
||||
@@ -714,6 +728,10 @@ class TestUtil(unittest.TestCase):
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
stripped = strip_jsonp('({"status": "success"});')
|
||||
d = json.loads(stripped)
|
||||
self.assertEqual(d, {'status': 'success'})
|
||||
|
||||
def test_uppercase_escape(self):
|
||||
self.assertEqual(uppercase_escape('aä'), 'aä')
|
||||
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
|
||||
@@ -767,6 +785,10 @@ class TestUtil(unittest.TestCase):
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
self.assertEqual(parse_codecs('av01.0.05M.08'), {
|
||||
'vcodec': 'av01.0.05M.08',
|
||||
'acodec': 'none',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
|
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
6
test/testdata/cookies/session_cookies.txt
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Netscape HTTP Cookie File
|
||||
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||
# This is a generated file! Do not edit.
|
||||
|
||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
|
||||
www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
|
@@ -88,6 +88,7 @@ from .utils import (
|
||||
version_tuple,
|
||||
write_json_file,
|
||||
write_string,
|
||||
YoutubeDLCookieJar,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
)
|
||||
@@ -211,7 +212,7 @@ class YoutubeDL(object):
|
||||
At the moment, this is only supported by YouTube.
|
||||
proxy: URL of the proxy server to use
|
||||
geo_verification_proxy: URL of the proxy to use for IP address verification
|
||||
on geo-restricted sites. (Experimental)
|
||||
on geo-restricted sites.
|
||||
socket_timeout: Time to wait for unresponsive hosts, in seconds
|
||||
bidi_workaround: Work around buggy terminals without bidirectional text
|
||||
support, using fridibi
|
||||
@@ -259,7 +260,7 @@ class YoutubeDL(object):
|
||||
- "warn": only emit a warning
|
||||
- "detect_or_warn": check whether we can do anything
|
||||
about it, warn otherwise (default)
|
||||
source_address: (Experimental) Client-side IP address to bind to.
|
||||
source_address: Client-side IP address to bind to.
|
||||
call_home: Boolean, true iff we are allowed to contact the
|
||||
youtube-dl servers for debugging.
|
||||
sleep_interval: Number of seconds to sleep before each download when
|
||||
@@ -281,11 +282,14 @@ class YoutubeDL(object):
|
||||
match_filter_func in utils.py is one example for this.
|
||||
no_color: Do not emit color codes in output.
|
||||
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
||||
HTTP header (experimental)
|
||||
HTTP header
|
||||
geo_bypass_country:
|
||||
Two-letter ISO 3166-2 country code that will be used for
|
||||
explicit geographic restriction bypassing via faking
|
||||
X-Forwarded-For HTTP header (experimental)
|
||||
X-Forwarded-For HTTP header
|
||||
geo_bypass_ip_block:
|
||||
IP range in CIDR notation that will be used similarly to
|
||||
geo_bypass_country
|
||||
|
||||
The following options determine which downloader is picked:
|
||||
external_downloader: Executable of the external downloader to call.
|
||||
@@ -302,8 +306,8 @@ class YoutubeDL(object):
|
||||
http_chunk_size.
|
||||
|
||||
The following options are used by the post processors:
|
||||
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
|
||||
otherwise prefer avconv.
|
||||
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
|
||||
otherwise prefer ffmpeg.
|
||||
postprocessor_args: A list of additional command-line arguments for the
|
||||
postprocessor.
|
||||
|
||||
@@ -555,7 +559,7 @@ class YoutubeDL(object):
|
||||
self.restore_console_title()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
def trouble(self, message=None, tb=None):
|
||||
"""Determine action to take when a download problem appears.
|
||||
@@ -1479,23 +1483,28 @@ class YoutubeDL(object):
|
||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||
cc = info_dict.get(cc_kind)
|
||||
if cc:
|
||||
for _, subtitle in cc.items():
|
||||
for subtitle_format in subtitle:
|
||||
if subtitle_format.get('url'):
|
||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||
if subtitle_format.get('ext') is None:
|
||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||
|
||||
automatic_captions = info_dict.get('automatic_captions')
|
||||
subtitles = info_dict.get('subtitles')
|
||||
if subtitles:
|
||||
for _, subtitle in subtitles.items():
|
||||
for subtitle_format in subtitle:
|
||||
if subtitle_format.get('url'):
|
||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||
if subtitle_format.get('ext') is None:
|
||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||
|
||||
if self.params.get('listsubtitles', False):
|
||||
if 'automatic_captions' in info_dict:
|
||||
self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
|
||||
self.list_subtitles(
|
||||
info_dict['id'], automatic_captions, 'automatic captions')
|
||||
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
|
||||
return
|
||||
|
||||
info_dict['requested_subtitles'] = self.process_subtitles(
|
||||
info_dict['id'], subtitles,
|
||||
info_dict.get('automatic_captions'))
|
||||
info_dict['id'], subtitles, automatic_captions)
|
||||
|
||||
# We now pick which formats have to be downloaded
|
||||
if info_dict.get('formats') is None:
|
||||
@@ -2289,10 +2298,9 @@ class YoutubeDL(object):
|
||||
self.cookiejar = compat_cookiejar.CookieJar()
|
||||
else:
|
||||
opts_cookiefile = expand_path(opts_cookiefile)
|
||||
self.cookiejar = compat_cookiejar.MozillaCookieJar(
|
||||
opts_cookiefile)
|
||||
self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
|
||||
if os.access(opts_cookiefile, os.R_OK):
|
||||
self.cookiejar.load()
|
||||
self.cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
|
@@ -430,6 +430,7 @@ def _real_main(argv=None):
|
||||
'config_location': opts.config_location,
|
||||
'geo_bypass': opts.geo_bypass,
|
||||
'geo_bypass_country': opts.geo_bypass_country,
|
||||
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
||||
# just for deprecation check
|
||||
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
||||
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
||||
|
@@ -2787,6 +2787,12 @@ except NameError: # Python 3
|
||||
compat_numeric_types = (int, float, complex)
|
||||
|
||||
|
||||
try:
|
||||
compat_integer_types = (int, long)
|
||||
except NameError: # Python 3
|
||||
compat_integer_types = (int, )
|
||||
|
||||
|
||||
if sys.version_info < (2, 7):
|
||||
def compat_socket_create_connection(address, timeout, source_address=None):
|
||||
host, port = address
|
||||
@@ -2974,6 +2980,7 @@ __all__ = [
|
||||
'compat_http_client',
|
||||
'compat_http_server',
|
||||
'compat_input',
|
||||
'compat_integer_types',
|
||||
'compat_itertools_count',
|
||||
'compat_kwargs',
|
||||
'compat_numeric_types',
|
||||
|
@@ -45,7 +45,6 @@ class FileDownloader(object):
|
||||
min_filesize: Skip files smaller than this size
|
||||
max_filesize: Skip files larger than this size
|
||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||
(experimental)
|
||||
external_downloader_args: A list of additional command-line arguments for the
|
||||
external downloader.
|
||||
hls_use_mpegts: Use the mpegts container for HLS videos.
|
||||
|
@@ -2,7 +2,10 @@ from __future__ import unicode_literals
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import urljoin
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
except DownloadError:
|
||||
# Don't retry fragment if error occurred during HTTP downloading
|
||||
# itself since it has own retry settings
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
break
|
||||
raise
|
||||
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(frag_index)
|
||||
|
@@ -217,10 +217,11 @@ class HttpFD(FileDownloader):
|
||||
before = start # start measuring
|
||||
|
||||
def retry(e):
|
||||
if ctx.tmpfilename != '-':
|
||||
to_stdout = ctx.tmpfilename == '-'
|
||||
if not to_stdout:
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
raise RetryDownload(e)
|
||||
|
||||
while True:
|
||||
|
@@ -29,66 +29,68 @@ class RtmpFD(FileDownloader):
|
||||
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
|
||||
cursor_in_new_line = True
|
||||
proc_stderr_closed = False
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = ''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
time_now = time.time()
|
||||
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes_estimate': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
try:
|
||||
while not proc_stderr_closed:
|
||||
# read line from stderr
|
||||
line = ''
|
||||
while True:
|
||||
char = proc.stderr.read(1)
|
||||
if not char:
|
||||
proc_stderr_closed = True
|
||||
break
|
||||
if char in [b'\r', b'\n']:
|
||||
break
|
||||
line += char.decode('ascii', 'replace')
|
||||
if not line:
|
||||
# proc_stderr_closed is True
|
||||
continue
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
percent = float(mobj.group(2))
|
||||
if not resume_percent:
|
||||
resume_percent = percent
|
||||
resume_downloaded_data_len = downloaded_data_len
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
|
||||
data_len = None
|
||||
if percent > 0:
|
||||
data_len = int(downloaded_data_len * 100 / percent)
|
||||
self._hook_progress({
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'total_bytes_estimate': data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'eta': eta,
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen('[rtmpdump] ' + line)
|
||||
proc.wait()
|
||||
else:
|
||||
# no percent for live streams
|
||||
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
|
||||
if mobj:
|
||||
downloaded_data_len = int(float(mobj.group(1)) * 1024)
|
||||
time_now = time.time()
|
||||
speed = self.calc_speed(start, time_now, downloaded_data_len)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': downloaded_data_len,
|
||||
'tmpfilename': tmpfilename,
|
||||
'filename': filename,
|
||||
'status': 'downloading',
|
||||
'elapsed': time_now - start,
|
||||
'speed': speed,
|
||||
})
|
||||
cursor_in_new_line = False
|
||||
elif self.params.get('verbose', False):
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
cursor_in_new_line = True
|
||||
self.to_screen('[rtmpdump] ' + line)
|
||||
finally:
|
||||
proc.wait()
|
||||
if not cursor_in_new_line:
|
||||
self.to_screen('')
|
||||
return proc.returncode
|
||||
@@ -163,7 +165,15 @@ class RtmpFD(FileDownloader):
|
||||
RD_INCOMPLETE = 2
|
||||
RD_NO_CONNECT = 3
|
||||
|
||||
retval = run_rtmpdump(args)
|
||||
started = time.time()
|
||||
|
||||
try:
|
||||
retval = run_rtmpdump(args)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
retval = RD_SUCCESS
|
||||
self.to_screen('\n[rtmpdump] Interrupted by user')
|
||||
|
||||
if retval == RD_NO_CONNECT:
|
||||
self.report_error('[rtmpdump] Could not connect to RTMP server.')
|
||||
@@ -171,7 +181,7 @@ class RtmpFD(FileDownloader):
|
||||
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
args = basic_args + ['--resume']
|
||||
if retval == RD_FAILED:
|
||||
@@ -188,13 +198,14 @@ class RtmpFD(FileDownloader):
|
||||
break
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] %s bytes' % fsize)
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
'total_bytes': fsize,
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'elapsed': time.time() - started,
|
||||
})
|
||||
return True
|
||||
else:
|
||||
|
@@ -105,22 +105,22 @@ class ABCIE(InfoExtractor):
|
||||
|
||||
class ABCIViewIE(InfoExtractor):
|
||||
IE_NAME = 'abc.net.au:iview'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
|
||||
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
||||
'info_dict': {
|
||||
'id': 'ZY9247A021S00',
|
||||
'id': 'ZX9371A050S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Gaston's Visit",
|
||||
'title': "Gaston's Birthday",
|
||||
'series': "Ben And Holly's Little Kingdom",
|
||||
'description': 'md5:18db170ad71cf161e006a4c688e33155',
|
||||
'upload_date': '20180318',
|
||||
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
||||
'upload_date': '20180604',
|
||||
'uploader_id': 'abc4kids',
|
||||
'timestamp': 1521400959,
|
||||
'timestamp': 1528140219,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -129,17 +129,16 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_params = self._parse_json(self._search_regex(
|
||||
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
|
||||
title = video_params.get('title') or video_params['seriesTitle']
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
|
||||
video_params = self._download_json(
|
||||
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
|
||||
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber')
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
sig = hmac.new(
|
||||
'android.content.res.Resources'.encode('utf-8'),
|
||||
b'android.content.res.Resources',
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
@@ -169,18 +168,26 @@ class ABCIViewIE(InfoExtractor):
|
||||
'ext': 'vtt',
|
||||
}]
|
||||
|
||||
is_live = video_params.get('livestream') == '1'
|
||||
if is_live:
|
||||
title = self._live_title(title)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(title),
|
||||
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
|
||||
'title': title,
|
||||
'description': video_params.get('description'),
|
||||
'thumbnail': video_params.get('thumbnail'),
|
||||
'duration': int_or_none(video_params.get('eventDuration')),
|
||||
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
|
||||
'series': unescapeHTML(video_params.get('seriesTitle')),
|
||||
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
|
||||
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
|
||||
'episode': self._html_search_meta('episode_title', webpage, default=None),
|
||||
'season_number': int_or_none(self._search_regex(
|
||||
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
@@ -1,8 +1,11 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt
|
||||
@@ -12,9 +15,12 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
pkcs1pad,
|
||||
srt_subtitles_timecode,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
@@ -35,6 +41,7 @@ class ADNIE(InfoExtractor):
|
||||
}
|
||||
}
|
||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
||||
|
||||
def _get_subtitles(self, sub_path, video_id):
|
||||
if not sub_path:
|
||||
@@ -42,16 +49,14 @@ class ADNIE(InfoExtractor):
|
||||
|
||||
enc_subtitles = self._download_webpage(
|
||||
urljoin(self._BASE_URL, sub_path),
|
||||
video_id, fatal=False, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
|
||||
})
|
||||
video_id, fatal=False)
|
||||
if not enc_subtitles:
|
||||
return None
|
||||
|
||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||
bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
|
||||
bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
|
||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||
))
|
||||
subtitles_json = self._parse_json(
|
||||
@@ -112,11 +117,24 @@ class ADNIE(InfoExtractor):
|
||||
error = None
|
||||
if not links:
|
||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
||||
links_data = self._download_json(urljoin(
|
||||
self._BASE_URL, links_url), video_id)
|
||||
token = options['token']
|
||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
'k': self._K,
|
||||
'e': 60,
|
||||
't': token,
|
||||
}))
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
links_data = self._download_json(
|
||||
urljoin(self._BASE_URL, links_url), video_id, headers={
|
||||
'Authorization': 'Bearer ' + authorization,
|
||||
})
|
||||
links = links_data.get('links') or {}
|
||||
metas = metas or links_data.get('meta') or {}
|
||||
sub_path = sub_path or links_data.get('subtitles')
|
||||
sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
|
||||
error = links_data.get('error')
|
||||
title = metas.get('title') or video_info['title']
|
||||
|
||||
|
@@ -1325,8 +1325,8 @@ class AdobePassIE(InfoExtractor):
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
@@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not video_id:
|
||||
entries = []
|
||||
for episode in video_data.get('archiveEpisodes', []):
|
||||
episode_url = episode.get('url')
|
||||
episode_url = url_or_none(episode.get('url'))
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
|
@@ -22,18 +22,19 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||
|
||||
class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:history|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||
fyi\.tv
|
||||
)/
|
||||
(?:
|
||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
||||
specials/(?P<special_display_id>[^/]+)/full-special
|
||||
specials/(?P<special_display_id>[^/]+)/full-special|
|
||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
@@ -80,6 +81,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
||||
'only_matching': True
|
||||
}]
|
||||
_DOMAIN_TO_REQUESTOR_ID = {
|
||||
'history.com': 'HISTORY',
|
||||
@@ -90,9 +94,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
||||
if show_path:
|
||||
url_parts = show_path.split('/')
|
||||
url_parts_len = len(url_parts)
|
||||
|
@@ -9,6 +9,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
)
|
||||
@@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||
file_elements = video_element.findall(compat_xpath('./file'))
|
||||
one = len(file_elements) == 1
|
||||
for file_num, file_element in enumerate(file_elements, start=1):
|
||||
file_url = file_element.text
|
||||
file_url = url_or_none(file_element.text)
|
||||
if not file_url:
|
||||
continue
|
||||
key = file_element.get('key', '')
|
||||
|
@@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
@@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
|
||||
zype_id = ep_meta.get('zype_id')
|
||||
if zype_id:
|
||||
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||
ie_key = 'Zype'
|
||||
else:
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||
ie_key = 'Kaltura'
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
@@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'url': embed_url,
|
||||
'ie_key': ie_key,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
|
@@ -3,11 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
@@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
subtitle_href = url_or_none(subtitle.get('href'))
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
@@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
|
||||
media_content = [media_content]
|
||||
for media_data in media_content:
|
||||
media = media_data.get('@attributes', {})
|
||||
media_url = media.get('url')
|
||||
media_url = url_or_none(media.get('url'))
|
||||
if not media_url:
|
||||
continue
|
||||
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
|
||||
@@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
|
||||
'url': media['url'],
|
||||
'url': media_url,
|
||||
'tbr': int_or_none(media.get('bitrate')),
|
||||
'filesize': int_or_none(media.get('fileSize')),
|
||||
'ext': ext,
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
@@ -52,7 +53,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
@@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||
}, fatal=False)
|
||||
if not playlist:
|
||||
continue
|
||||
stream_url = playlist.get('streamurl')
|
||||
stream_url = url_or_none(playlist.get('streamurl'))
|
||||
if stream_url:
|
||||
rtmp = re.search(
|
||||
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',
|
||||
|
@@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
@@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
@@ -277,10 +302,13 @@ class AnvatoIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
for rendition in video_data.get('renditions', []):
|
||||
video_url = rendition.get('url')
|
||||
video_url = url_or_none(rendition.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
ext = rendition.get('format')
|
||||
|
94
youtube_dl/extractor/apa.py
Normal file
94
youtube_dl/extractor/apa.py
Normal file
@@ -0,0 +1,94 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class APAIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||
'info_dict': {
|
||||
'id': 'jjv85FdZ',
|
||||
'ext': 'mp4',
|
||||
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 254,
|
||||
'timestamp': 1519211149,
|
||||
'upload_date': '20180221',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
'jwplatform id', default=None)
|
||||
|
||||
if jwplatform_id:
|
||||
return self.url_result(
|
||||
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||
video_id=video_id)
|
||||
|
||||
sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
source_url = url_or_none(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
ext = determine_ext(source_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@@ -4,66 +4,92 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
'age_limit': 0,
|
||||
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
|
||||
'duration': 231,
|
||||
'timestamp': 1387394859,
|
||||
'upload_date': '20131218',
|
||||
'view_count': int,
|
||||
},
|
||||
# 'skip': 'Extremely unreliable',
|
||||
}
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'https://www.aparat.com/v/8dflw/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
# Provides more metadata
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
file_list = self._parse_json(
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
|
||||
'file list'),
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = item.get('file')
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': label or ext,
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height', default=None)),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
for sources in player['multiSRC']:
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
file_url = url_or_none(item.get('src'))
|
||||
if not file_url:
|
||||
continue
|
||||
item_type = item.get('type')
|
||||
if item_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': 'http-%s' % (label or ext),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height',
|
||||
default=None)),
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return {
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@@ -5,7 +5,6 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .generic import GenericIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
@@ -15,13 +14,14 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_etree_fromstring
|
||||
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
@@ -37,6 +37,9 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@@ -100,7 +103,7 @@ class ARDMediathekIE(InfoExtractor):
|
||||
quality = stream.get('_quality')
|
||||
server = stream.get('_server')
|
||||
for stream_url in stream_urls:
|
||||
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
|
||||
if not url_or_none(stream_url):
|
||||
continue
|
||||
ext = determine_ext(stream_url)
|
||||
if quality != 'auto' and ext in ('f4m', 'm3u8'):
|
||||
@@ -170,13 +173,18 @@ class ARDMediathekIE(InfoExtractor):
|
||||
title = self._html_search_regex(
|
||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||
r'<h4 class="headline">(.*?)</h4>'],
|
||||
r'<h4 class="headline">(.*?)</h4>',
|
||||
r'<title[^>]*>(.*?)</title>'],
|
||||
webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'dcterms.abstract', webpage, 'description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'meta description')
|
||||
'description', webpage, 'meta description', default=None)
|
||||
if description is None:
|
||||
description = self._html_search_regex(
|
||||
r'<p\s+class="teasertext">(.+?)</p>',
|
||||
webpage, 'teaser text', default=None)
|
||||
|
||||
# Thumbnail is sometimes not present.
|
||||
# It is in the mobile version, but that seems to use a different URL
|
||||
@@ -282,3 +290,76 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'title': 'Tatort: Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
|
||||
'upload_date': '20180826',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
|
||||
res = {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
formats = []
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
||||
|
||||
if '_duration' in widget:
|
||||
res['duration'] = widget['_duration']
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
if '_subtitleUrl' in widget:
|
||||
res['subtitles'] = {'de': [{
|
||||
'ext': 'ttml',
|
||||
'url': widget['_subtitleUrl'],
|
||||
}]}
|
||||
if '_quality' in widget:
|
||||
format_url = widget['_stream']['json'][0]
|
||||
|
||||
if format_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif format_url.endswith('m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'http-' + widget['_quality'],
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
res['formats'] = formats
|
||||
|
||||
return res
|
||||
|
@@ -8,7 +8,6 @@ from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'postid': video_id,
|
||||
'action': 'get_channel_kaltura_vars',
|
||||
}))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id = data['entry_id']
|
||||
entry_id, partner_id, title = [None] * 3
|
||||
|
||||
vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False)
|
||||
if vars:
|
||||
entry_id = vars.get('entry_id')
|
||||
partner_id = vars.get('partner_id')
|
||||
title = vars.get('vid_label')
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._search_regex(
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
|
||||
'kaltura id', group='id')
|
||||
|
||||
if not partner_id:
|
||||
partner_id = self._search_regex(
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||
video_title=data.get('vid_label'))
|
||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
||||
video_title=title)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
|
@@ -74,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
|
@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||
webpage, 'player data')), display_id)['config']['initial_video']
|
||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||
webpage, 'player data', group='json')),
|
||||
display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
@@ -5,13 +5,12 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class AudiMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
|
||||
'md5': '79a8b71c46d49042609795ab59779b66',
|
||||
'info_dict': {
|
||||
@@ -24,41 +23,46 @@ class AudiMediaIE(InfoExtractor):
|
||||
'duration': 74022,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
|
||||
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
|
||||
}, {
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
raw_payload = self._search_regex([
|
||||
r'class="amtv-embed"[^>]+id="([^"]+)"',
|
||||
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
|
||||
r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
|
||||
r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
|
||||
r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
|
||||
r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
|
||||
r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
|
||||
], webpage, 'raw payload')
|
||||
_, stage_mode, video_id, lang = raw_payload.split('-')
|
||||
_, stage_mode, video_id, _ = raw_payload.split('-')
|
||||
|
||||
# TODO: handle s and e stage_mode (live streams and ended live streams)
|
||||
if stage_mode not in ('s', 'e'):
|
||||
request = sanitized_Request(
|
||||
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
|
||||
headers={'X-Auth-Token': self._AUTH_TOKEN})
|
||||
json_data = self._download_json(request, video_id)['results']
|
||||
video_data = self._download_json(
|
||||
'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
|
||||
video_id, query={
|
||||
'embed[]': ['video_versions', 'thumbnail_image'],
|
||||
})['results']
|
||||
formats = []
|
||||
|
||||
stream_url_hls = json_data.get('stream_url_hls')
|
||||
stream_url_hls = video_data.get('stream_url_hls')
|
||||
if stream_url_hls:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url_hls, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
stream_url_hds = json_data.get('stream_url_hds')
|
||||
stream_url_hds = video_data.get('stream_url_hds')
|
||||
if stream_url_hds:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url_hds + '?hdcore=3.4.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
|
||||
for video_version in json_data.get('video_versions'):
|
||||
for video_version in video_data.get('video_versions', []):
|
||||
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
|
||||
if not video_version_url:
|
||||
continue
|
||||
@@ -79,11 +83,11 @@ class AudiMediaIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': json_data['title'],
|
||||
'description': json_data.get('subtitle'),
|
||||
'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
|
||||
'timestamp': parse_iso8601(json_data.get('publication_date')),
|
||||
'duration': int_or_none(json_data.get('duration')),
|
||||
'view_count': int_or_none(json_data.get('view_count')),
|
||||
'title': video_data['title'],
|
||||
'description': video_data.get('subtitle'),
|
||||
'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
|
||||
'timestamp': parse_iso8601(video_data.get('publication_date')),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'view_count': int_or_none(video_data.get('view_count')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -65,7 +65,7 @@ class AudiomackIE(InfoExtractor):
|
||||
return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
|
||||
|
||||
return {
|
||||
'id': api_response.get('id', album_url_tag),
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
|
@@ -1,213 +1,86 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
(?:
|
||||
\#video=
|
||||
(?P<kaltura_id>
|
||||
[_0-9a-z]+
|
||||
)
|
||||
)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
entry_id = mobj.group('kaltura_id')
|
||||
|
||||
entries = []
|
||||
if not entry_id:
|
||||
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
||||
payload = {
|
||||
'query': '''query VideoContext($articleId: ID!) {
|
||||
article: node(id: $articleId) {
|
||||
... on Article {
|
||||
mainAssetRelation {
|
||||
asset {
|
||||
... on VideoAsset {
|
||||
kalturaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||
}
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
data=json.dumps(payload).encode())
|
||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
||||
|
||||
|
||||
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien show playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?:
|
||||
all-episodes|
|
||||
alle-episoden
|
||||
)/
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
||||
'info_dict': {
|
||||
'id': 'astrotalk',
|
||||
'title': 'TeleZüri: AstroTalk - alle episoden',
|
||||
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
episodes = get_element_by_class('search-mobile-box', webpage)
|
||||
entries = [self.url_result(
|
||||
urljoin(url, m.group('url'))) for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
@@ -44,7 +44,7 @@ class BambuserIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
|
@@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@@ -16,14 +15,18 @@ from ..utils import (
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
@@ -35,13 +38,44 @@ class BandcampIE(InfoExtractor):
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
|
||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
'track_number': 1,
|
||||
'track_id': '2650410135',
|
||||
'artist': 'Ben Prunty',
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
|
||||
'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
|
||||
'info_dict': {
|
||||
'id': '2584466013',
|
||||
'ext': 'mp3',
|
||||
'title': 'Mastodon - Hail to Fire',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
'track_number': 5,
|
||||
'track_id': '2584466013',
|
||||
'artist': 'Mastodon',
|
||||
'album': 'Call of the Mastodon',
|
||||
},
|
||||
}]
|
||||
|
||||
@@ -50,19 +84,23 @@ class BandcampIE(InfoExtractor):
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
track_id = None
|
||||
track = None
|
||||
track_number = None
|
||||
duration = None
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
formats = []
|
||||
track_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
||||
webpage, 'track info', default='{}'), title)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
for format_id, format_url in file_.items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -72,85 +110,110 @@ class BandcampIE(InfoExtractor):
|
||||
'acodec': ext,
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
def extract(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'track id')
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, video_id, 'Downloading free downloads page')
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
|
||||
info = blob['digital_items'][0]
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
lambda x: x['download_items'][0]), dict)
|
||||
if info:
|
||||
downloads = info.get('downloads')
|
||||
if isinstance(downloads, dict):
|
||||
if not track:
|
||||
track = info.get('title')
|
||||
if not artist:
|
||||
artist = info.get('artist')
|
||||
if not thumbnail:
|
||||
thumbnail = info.get('thumb_url')
|
||||
|
||||
downloads = info['downloads']
|
||||
track = info['title']
|
||||
download_formats = {}
|
||||
download_formats_list = blob.get('download_formats')
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
artist = info.get('artist')
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
download_formats = {}
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
formats = []
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, video_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = stat.get('retry_url')
|
||||
if not isinstance(retry_url, compat_str):
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url') or thumbnail,
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -306,7 +369,7 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in show['audio_stream'].items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
for known_ext in KNOWN_EXTENSIONS:
|
||||
if known_ext in format_id:
|
||||
|
@@ -12,6 +12,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
@@ -20,7 +21,6 @@ from ..utils import (
|
||||
urljoin,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_urlparse,
|
||||
)
|
||||
@@ -29,7 +29,7 @@ from ..compat import (
|
||||
class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'[pbw][\da-z]{7}'
|
||||
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
@@ -236,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
||||
@@ -333,14 +339,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
self._raise_extractor_error(last_exception)
|
||||
|
||||
def _download_media_selector_url(self, url, programme_id=None):
|
||||
try:
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML')
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
|
||||
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
|
||||
else:
|
||||
raise
|
||||
media_selection = self._download_xml(
|
||||
url, programme_id, 'Downloading media selection XML',
|
||||
expected_status=(403, 404))
|
||||
return self._process_media_selector(media_selection, programme_id)
|
||||
|
||||
def _process_media_selector(self, media_selection, programme_id):
|
||||
@@ -772,6 +773,28 @@ class BBCIE(BBCCoUkIE):
|
||||
# single video article embedded with data-media-vpid
|
||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||
'info_dict': {
|
||||
'id': 'p06556y7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# window.__PRELOADED_STATE__
|
||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||
'info_dict': {
|
||||
'id': 'b0b9z4vz',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prom 6: An American in Paris and Turangalila',
|
||||
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
@@ -994,6 +1017,66 @@ class BBCIE(BBCCoUkIE):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||
'bbcthree config', default='{}'),
|
||||
playlist_id, transform_source=js_to_json, fatal=False)
|
||||
if bbc3_config:
|
||||
bbc3_playlist = try_get(
|
||||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
||||
dict)
|
||||
if bbc3_playlist:
|
||||
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||
thumbnail = bbc3_playlist.get('holdingImageURL')
|
||||
entries = []
|
||||
for bbc3_item in bbc3_playlist['items']:
|
||||
programme_id = bbc3_item.get('versionID')
|
||||
if not programme_id:
|
||||
continue
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
self._sort_formats(formats)
|
||||
entries.append({
|
||||
'id': programme_id,
|
||||
'title': playlist_title,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
|
@@ -12,7 +12,7 @@ class BellMediaIE(InfoExtractor):
|
||||
(?:
|
||||
ctv|
|
||||
tsn|
|
||||
bnn|
|
||||
bnn(?:bloomberg)?|
|
||||
thecomedynetwork|
|
||||
discovery|
|
||||
discoveryvelocity|
|
||||
@@ -27,17 +27,16 @@ class BellMediaIE(InfoExtractor):
|
||||
much\.com
|
||||
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctv.ca/video/player?vid=706966',
|
||||
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
|
||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
|
||||
'info_dict': {
|
||||
'id': '706966',
|
||||
'ext': 'mp4',
|
||||
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
|
||||
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
|
||||
'upload_date': '20150919',
|
||||
'timestamp': 1442624700,
|
||||
'id': '1403070',
|
||||
'ext': 'flv',
|
||||
'title': 'David Cockfield\'s Top Picks',
|
||||
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||
'upload_date': '20180525',
|
||||
'timestamp': 1527288600,
|
||||
},
|
||||
'expected_warnings': ['HTTP Error 404'],
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
@@ -70,6 +69,7 @@ class BellMediaIE(InfoExtractor):
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
'etalk': 'ctv',
|
||||
'bnnbloomberg': 'bnn',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -114,7 +114,7 @@ class BiliBiliIE(InfoExtractor):
|
||||
|
||||
if 'anime/' not in url:
|
||||
cid = self._search_regex(
|
||||
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
|
||||
default=None
|
||||
) or compat_parse_qs(self._search_regex(
|
||||
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
|
||||
|
120
youtube_dl/extractor/bitchute.py
Normal file
120
youtube_dl/extractor/bitchute.py
Normal file
@@ -0,0 +1,120 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urlencode_postdata
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
|
||||
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
|
||||
'info_dict': {
|
||||
'id': 'szoMrox2JEI',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fuck bitches get money',
|
||||
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Victoria X Rave',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
formats = [
|
||||
{'url': mobj.group('url')}
|
||||
for mobj in re.finditer(
|
||||
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
|
||||
webpage, 'description', fatal=False)
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:image:src', webpage, 'thumbnail')
|
||||
uploader = self._html_search_regex(
|
||||
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
|
||||
'uploader', fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class BitChuteChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
|
||||
'playlist_mincount': 185,
|
||||
'info_dict': {
|
||||
'id': 'victoriaxrave',
|
||||
},
|
||||
}
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
|
||||
def _entries(self, channel_id):
|
||||
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
|
||||
offset = 0
|
||||
for page_num in itertools.count(1):
|
||||
data = self._download_json(
|
||||
'%sextend/' % channel_url, channel_id,
|
||||
'Downloading channel page %d' % page_num,
|
||||
data=urlencode_postdata({
|
||||
'csrfmiddlewaretoken': self._TOKEN,
|
||||
'name': '',
|
||||
'offset': offset,
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Referer': channel_url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Cookie': 'csrftoken=%s' % self._TOKEN,
|
||||
})
|
||||
if data.get('success') is False:
|
||||
break
|
||||
html = data.get('html')
|
||||
if not html:
|
||||
break
|
||||
video_ids = re.findall(
|
||||
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
|
||||
html)
|
||||
if not video_ids:
|
||||
break
|
||||
offset += len(video_ids)
|
||||
for video_id in video_ids:
|
||||
yield self.url_result(
|
||||
'https://www.bitchute.com/video/%s' % video_id,
|
||||
ie=BitChuteIE.ie_key(), video_id=video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._entries(channel_id), playlist_id=channel_id)
|
@@ -4,8 +4,10 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BreakIE(InfoExtractor):
|
||||
@@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video in content:
|
||||
video_url = video.get('url')
|
||||
if not video_url or not isinstance(video_url, compat_str):
|
||||
video_url = url_or_none(video.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(self._search_regex(
|
||||
r'(\d+)_kbps', video_url, 'tbr', default=None))
|
||||
|
@@ -1,8 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
@@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
@@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
if not publisher_id:
|
||||
player_key = query.get('playerKey')
|
||||
if player_key and ',' in player_key[0]:
|
||||
player_key = player_key[0]
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
|
||||
player_page, 'player key', fatal=False)
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
@@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
return ad_info
|
||||
|
||||
if 'url' not in info and not info.get('formats'):
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
if not info.get('url') and not info.get('formats'):
|
||||
uploader_id = info.get('uploader_id')
|
||||
if uploader_id:
|
||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
|
||||
|
||||
@@ -572,7 +604,8 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
container = source.get('container')
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
src = source.get('src')
|
||||
if ext == 'ism' or container == 'WVM':
|
||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
||||
continue
|
||||
elif ext == 'm3u8' or container == 'M2TS':
|
||||
if not src:
|
||||
@@ -629,6 +662,14 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
'format_id': build_format_id('rtmp'),
|
||||
})
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
# for sonyliv.com DRM protected videos
|
||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
||||
if s3_source_url:
|
||||
formats.append({
|
||||
'url': s3_source_url,
|
||||
'format_id': 'source',
|
||||
})
|
||||
|
||||
errors = json_data.get('errors')
|
||||
if not formats and errors:
|
||||
@@ -669,7 +710,10 @@ class BrightcoveNewIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
|
||||
})
|
||||
|
||||
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
|
42
youtube_dl/extractor/businessinsider.py
Normal file
42
youtube_dl/extractor/businessinsider.py
Normal file
@@ -0,0 +1,42 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
|
||||
|
||||
class BusinessInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
|
||||
'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
|
||||
'info_dict': {
|
||||
'id': 'hZRllCfw',
|
||||
'ext': 'mp4',
|
||||
'title': "Here's how much radiation you're exposed to in everyday life",
|
||||
'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
|
||||
'upload_date': '20170709',
|
||||
'timestamp': 1499606400,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
jwplatform_id = self._search_regex(
|
||||
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
|
||||
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
|
||||
r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
|
||||
webpage, 'jwplatform id')
|
||||
return self.url_result(
|
||||
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
|
||||
video_id=video_id)
|
96
youtube_dl/extractor/cammodels.py
Normal file
96
youtube_dl/extractor/cammodels.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CamModelsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
url, user_id, headers=self.geo_verification_headers())
|
||||
|
||||
manifest_root = self._html_search_regex(
|
||||
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
|
||||
|
||||
if not manifest_root:
|
||||
ERRORS = (
|
||||
("I'm offline, but let's stay connected", 'This user is currently offline'),
|
||||
('in a private show', 'This user is in a private show'),
|
||||
('is currently performing LIVE', 'This model is currently performing live'),
|
||||
)
|
||||
for pattern, message in ERRORS:
|
||||
if pattern in webpage:
|
||||
error = message
|
||||
expected = True
|
||||
break
|
||||
else:
|
||||
error = 'Unable to find manifest URL root'
|
||||
expected = False
|
||||
raise ExtractorError(error, expected=expected)
|
||||
|
||||
manifest = self._download_json(
|
||||
'%s%s.json' % (manifest_root, user_id), user_id)
|
||||
|
||||
formats = []
|
||||
for format_id, format_dict in manifest['formats'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
encodings = format_dict.get('encodings')
|
||||
if not isinstance(encodings, list):
|
||||
continue
|
||||
vcodec = format_dict.get('videoCodec')
|
||||
acodec = format_dict.get('audioCodec')
|
||||
for media in encodings:
|
||||
if not isinstance(media, dict):
|
||||
continue
|
||||
media_url = url_or_none(media.get('location'))
|
||||
if not media_url:
|
||||
continue
|
||||
|
||||
format_id_list = [format_id]
|
||||
height = int_or_none(media.get('videoHeight'))
|
||||
if height is not None:
|
||||
format_id_list.append('%dp' % height)
|
||||
f = {
|
||||
'url': media_url,
|
||||
'format_id': '-'.join(format_id_list),
|
||||
'width': int_or_none(media.get('videoWidth')),
|
||||
'height': height,
|
||||
'vbr': int_or_none(media.get('videoKbps')),
|
||||
'abr': int_or_none(media.get('audioKbps')),
|
||||
'fps': int_or_none(media.get('fps')),
|
||||
'vcodec': vcodec,
|
||||
'acodec': acodec,
|
||||
}
|
||||
if 'rtmp' in format_id:
|
||||
f['ext'] = 'flv'
|
||||
elif 'hls' in format_id:
|
||||
f.update({
|
||||
'ext': 'mp4',
|
||||
# hls skips fragments, preferring rtmp
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
continue
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': user_id,
|
||||
'title': self._live_title(user_id),
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
69
youtube_dl/extractor/camtube.py
Normal file
69
youtube_dl/extractor/camtube.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class CamTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
|
||||
'info_dict': {
|
||||
'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
|
||||
'display_id': 'minafay-030618-1136-chaturbate-female',
|
||||
'ext': 'mp4',
|
||||
'title': 'minafay-030618-1136-chaturbate-female',
|
||||
'duration': 1274,
|
||||
'timestamp': 1528018608,
|
||||
'upload_date': '20180603',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_API_BASE = 'https://api.camtube.co'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
token = self._download_json(
|
||||
'%s/rpc/session/new' % self._API_BASE, display_id,
|
||||
'Downloading session token')['token']
|
||||
|
||||
self._set_cookie('api.camtube.co', 'session', token)
|
||||
|
||||
video = self._download_json(
|
||||
'%s/recordings/%s' % (self._API_BASE, display_id), display_id,
|
||||
headers={'Referer': url})
|
||||
|
||||
video_id = video['uuid']
|
||||
timestamp = unified_timestamp(video.get('createdAt'))
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('viewCount'))
|
||||
like_count = int_or_none(video.get('likeCount'))
|
||||
creator = video.get('stageName')
|
||||
|
||||
formats = [{
|
||||
'url': '%s/recordings/%s/manifest.m3u8'
|
||||
% (self._API_BASE, video_id),
|
||||
'format_id': 'hls',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': display_id,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'formats': formats,
|
||||
}
|
@@ -11,6 +11,7 @@ from ..utils import (
|
||||
strip_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
info = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
# title is optional here since it may be extracted by extractor
|
||||
# that is delegated from here
|
||||
title = strip_or_none(self._html_search_regex(
|
||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
||||
webpage, 'title').strip()
|
||||
webpage, 'title', default=None))
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
||||
@@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
|
||||
# the first one
|
||||
video_id = list(video.values())[0].get('videoid')
|
||||
|
||||
return {
|
||||
return merge_dicts(info, {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||
'ie_key': CanvasIE.ie_key(),
|
||||
@@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'release_date': release_date,
|
||||
}
|
||||
})
|
||||
|
@@ -17,9 +17,11 @@ from ..utils import (
|
||||
xpath_element,
|
||||
xpath_with_ns,
|
||||
find_xpath_attr,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -129,15 +131,23 @@ class CBCIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
entries = [
|
||||
self._extract_player_init(player_init, display_id)
|
||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||
media_ids = []
|
||||
for media_id_re in (
|
||||
r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
|
||||
r'<div[^>]+\bid=["\']player-(\d+)',
|
||||
r'guid["\']\s*:\s*["\'](\d+)'):
|
||||
media_ids.extend(re.findall(media_id_re, webpage))
|
||||
entries.extend([
|
||||
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
|
||||
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
|
||||
for media_id in orderedSet(media_ids)])
|
||||
return self.playlist_result(
|
||||
entries, display_id,
|
||||
self._og_search_title(webpage, fatal=False),
|
||||
entries, display_id, strip_or_none(title),
|
||||
self._og_search_description(webpage))
|
||||
|
||||
|
||||
|
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
|
||||
media_url = media['media']['url']
|
||||
if isinstance(media_url, list):
|
||||
for format_ in media_url:
|
||||
format_url = format_.get('file')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
|
@@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
|
@@ -31,7 +31,8 @@ class ChaturbateIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=self.geo_verification_headers())
|
||||
|
||||
m3u8_urls = []
|
||||
|
||||
|
142
youtube_dl/extractor/ciscolive.py
Normal file
142
youtube_dl/extractor/ciscolive.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class CiscoLiveBaseIE(InfoExtractor):
|
||||
# These appear to be constant across all Cisco Live presentations
|
||||
# and are not tied to any user session or event
|
||||
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
|
||||
RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
|
||||
RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
|
||||
|
||||
HEADERS = {
|
||||
'Origin': 'https://ciscolive.cisco.com',
|
||||
'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
|
||||
'rfWidgetId': RAINFOCUS_WIDGET_ID,
|
||||
}
|
||||
|
||||
def _call_api(self, ep, rf_id, query, referrer, note=None):
|
||||
headers = self.HEADERS.copy()
|
||||
headers['Referer'] = referrer
|
||||
return self._download_json(
|
||||
self.RAINFOCUS_API_URL % ep, rf_id, note=note,
|
||||
data=urlencode_postdata(query), headers=headers)
|
||||
|
||||
def _parse_rf_item(self, rf_item):
|
||||
event_name = rf_item.get('eventName')
|
||||
title = rf_item['title']
|
||||
description = clean_html(rf_item.get('abstract'))
|
||||
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
|
||||
bc_id = rf_item['videos'][0]['url']
|
||||
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
|
||||
duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
|
||||
location = try_get(rf_item, lambda x: x['times'][0]['room'])
|
||||
|
||||
if duration:
|
||||
duration = duration * 60
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': bc_url,
|
||||
'ie_key': 'BrightcoveNew',
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'creator': presenter_name,
|
||||
'location': location,
|
||||
'series': event_name,
|
||||
}
|
||||
|
||||
|
||||
class CiscoLiveSessionIE(CiscoLiveBaseIE):
|
||||
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
|
||||
'md5': 'c98acf395ed9c9f766941c70f5352e22',
|
||||
'info_dict': {
|
||||
'id': '5803694304001',
|
||||
'ext': 'mp4',
|
||||
'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
|
||||
'description': 'md5:ec4a436019e09a918dec17714803f7cc',
|
||||
'timestamp': 1530305395,
|
||||
'upload_date': '20180629',
|
||||
'uploader_id': '5647924234001',
|
||||
'location': '16B Mezz.',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
rf_id = self._match_id(url)
|
||||
rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
|
||||
return self._parse_rf_item(rf_result['items'][0])
|
||||
|
||||
|
||||
class CiscoLiveSearchIE(CiscoLiveBaseIE):
|
||||
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
|
||||
_TESTS = [{
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
|
||||
'info_dict': {
|
||||
'title': 'Search query',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
|
||||
|
||||
@staticmethod
|
||||
def _check_bc_id_exists(rf_item):
|
||||
return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
|
||||
|
||||
def _entries(self, query, url):
|
||||
query['size'] = 50
|
||||
query['from'] = 0
|
||||
for page_num in itertools.count(1):
|
||||
results = self._call_api(
|
||||
'search', None, query, url,
|
||||
'Downloading search JSON page %d' % page_num)
|
||||
sl = try_get(results, lambda x: x['sectionList'][0], dict)
|
||||
if sl:
|
||||
results = sl
|
||||
items = results.get('items')
|
||||
if not items or not isinstance(items, list):
|
||||
break
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if not self._check_bc_id_exists(item):
|
||||
continue
|
||||
yield self._parse_rf_item(item)
|
||||
size = int_or_none(results.get('size'))
|
||||
if size is not None:
|
||||
query['size'] = size
|
||||
total = int_or_none(results.get('total'))
|
||||
if total is not None and query['from'] + query['size'] > total:
|
||||
break
|
||||
query['from'] += query['size']
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
query['type'] = 'session'
|
||||
return self.playlist_result(
|
||||
self._entries(query, url), playlist_title='Search query')
|
@@ -1,19 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
@@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in gexo_files.items():
|
||||
video_url = f.get('url')
|
||||
video_url = url_or_none(f.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = f.get('fmt')
|
||||
height = f.get('h')
|
||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||
formats.append({
|
||||
'url': _decode(video_url),
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('w')),
|
||||
'height': int_or_none(height),
|
||||
|
60
youtube_dl/extractor/cloudflarestream.py
Normal file
60
youtube_dl/extractor/cloudflarestream.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CloudflareStreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:watch\.)?cloudflarestream\.com/|
|
||||
embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=
|
||||
)
|
||||
(?P<id>[\da-f]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
|
||||
'info_dict': {
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
|
||||
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
}
|
@@ -1,15 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ClypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://clyp.it/ojz2wfah',
|
||||
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
|
||||
'info_dict': {
|
||||
@@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
|
||||
'timestamp': 1443515251,
|
||||
'upload_date': '20150929',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
|
||||
'info_dict': {
|
||||
'id': 'b04p1odi',
|
||||
'ext': 'mp3',
|
||||
'title': 'GJ! (Reward Edit)',
|
||||
'description': 'Metal Resistance (THE ONE edition)',
|
||||
'duration': 177.789,
|
||||
'timestamp': 1528241278,
|
||||
'upload_date': '20180605',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
|
||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
token = qs.get('token', [None])[0]
|
||||
|
||||
query = {}
|
||||
if token:
|
||||
query['token'] = token
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id)
|
||||
'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
|
||||
|
||||
formats = []
|
||||
for secure in ('', 'Secure'):
|
||||
@@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
|
||||
title = metadata['Title']
|
||||
description = metadata.get('Description')
|
||||
duration = float_or_none(metadata.get('Duration'))
|
||||
timestamp = parse_iso8601(metadata.get('DateCreated'))
|
||||
timestamp = unified_timestamp(metadata.get('DateCreated'))
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
@@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
'info_dict': {
|
||||
'id': '7000031301',
|
||||
'ext': 'mp4',
|
||||
'title': "Trump: I don't necessarily agree with raising rates",
|
||||
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||
'timestamp': 1531958400,
|
||||
'upload_date': '20180719',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||
'video id')
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
|
@@ -19,6 +19,7 @@ from ..compat import (
|
||||
compat_cookies,
|
||||
compat_etree_fromstring,
|
||||
compat_getpass,
|
||||
compat_integer_types,
|
||||
compat_http_client,
|
||||
compat_os_name,
|
||||
compat_str,
|
||||
@@ -51,6 +52,7 @@ from ..utils import (
|
||||
GeoUtils,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
JSON_LD_RE,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_codecs,
|
||||
@@ -67,6 +69,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@@ -209,6 +212,11 @@ class InfoExtractor(object):
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
channel: Full name of the channel the video is uploaded on.
|
||||
Note that channel fields may or may not repeat uploader
|
||||
fields. This depends on a particular extractor.
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@@ -339,15 +347,17 @@ class InfoExtractor(object):
|
||||
_GEO_BYPASS attribute may be set to False in order to disable
|
||||
geo restriction bypass mechanisms for a particular extractor.
|
||||
Though it won't disable explicit geo restriction bypass based on
|
||||
country code provided with geo_bypass_country. (experimental)
|
||||
country code provided with geo_bypass_country.
|
||||
|
||||
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
|
||||
countries for this extractor. One of these countries will be used by
|
||||
geo restriction bypass mechanism right away in order to bypass
|
||||
geo restriction, of course, if the mechanism is not disabled. (experimental)
|
||||
geo restriction, of course, if the mechanism is not disabled.
|
||||
|
||||
NB: both these geo attributes are experimental and may change in future
|
||||
or be completely removed.
|
||||
_GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
|
||||
IP blocks in CIDR notation for this extractor. One of these IP blocks
|
||||
will be used by geo restriction bypass mechanism similarly
|
||||
to _GEO_COUNTRIES.
|
||||
|
||||
Finally, the _WORKING attribute should be set to False for broken IEs
|
||||
in order to warn the users and skip the tests.
|
||||
@@ -358,6 +368,7 @@ class InfoExtractor(object):
|
||||
_x_forwarded_for_ip = None
|
||||
_GEO_BYPASS = True
|
||||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
@@ -392,12 +403,15 @@ class InfoExtractor(object):
|
||||
|
||||
def initialize(self):
|
||||
"""Initializes an instance (authentication, etc)."""
|
||||
self._initialize_geo_bypass(self._GEO_COUNTRIES)
|
||||
self._initialize_geo_bypass({
|
||||
'countries': self._GEO_COUNTRIES,
|
||||
'ip_blocks': self._GEO_IP_BLOCKS,
|
||||
})
|
||||
if not self._ready:
|
||||
self._real_initialize()
|
||||
self._ready = True
|
||||
|
||||
def _initialize_geo_bypass(self, countries):
|
||||
def _initialize_geo_bypass(self, geo_bypass_context):
|
||||
"""
|
||||
Initialize geo restriction bypass mechanism.
|
||||
|
||||
@@ -408,28 +422,82 @@ class InfoExtractor(object):
|
||||
HTTP requests.
|
||||
|
||||
This method will be used for initial geo bypass mechanism initialization
|
||||
during the instance initialization with _GEO_COUNTRIES.
|
||||
during the instance initialization with _GEO_COUNTRIES and
|
||||
_GEO_IP_BLOCKS.
|
||||
|
||||
You may also manually call it from extractor's code if geo countries
|
||||
You may also manually call it from extractor's code if geo bypass
|
||||
information is not available beforehand (e.g. obtained during
|
||||
extraction) or due to some another reason.
|
||||
extraction) or due to some other reason. In this case you should pass
|
||||
this information in geo bypass context passed as first argument. It may
|
||||
contain following fields:
|
||||
|
||||
countries: List of geo unrestricted countries (similar
|
||||
to _GEO_COUNTRIES)
|
||||
ip_blocks: List of geo unrestricted IP blocks in CIDR notation
|
||||
(similar to _GEO_IP_BLOCKS)
|
||||
|
||||
"""
|
||||
if not self._x_forwarded_for_ip:
|
||||
country_code = self._downloader.params.get('geo_bypass_country', None)
|
||||
# If there is no explicit country for geo bypass specified and
|
||||
# the extractor is known to be geo restricted let's fake IP
|
||||
# as X-Forwarded-For right away.
|
||||
if (not country_code and
|
||||
self._GEO_BYPASS and
|
||||
self._downloader.params.get('geo_bypass', True) and
|
||||
countries):
|
||||
country_code = random.choice(countries)
|
||||
if country_code:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
|
||||
|
||||
# Geo bypass mechanism is explicitly disabled by user
|
||||
if not self._downloader.params.get('geo_bypass', True):
|
||||
return
|
||||
|
||||
if not geo_bypass_context:
|
||||
geo_bypass_context = {}
|
||||
|
||||
# Backward compatibility: previously _initialize_geo_bypass
|
||||
# expected a list of countries, some 3rd party code may still use
|
||||
# it this way
|
||||
if isinstance(geo_bypass_context, (list, tuple)):
|
||||
geo_bypass_context = {
|
||||
'countries': geo_bypass_context,
|
||||
}
|
||||
|
||||
# The whole point of geo bypass mechanism is to fake IP
|
||||
# as X-Forwarded-For HTTP header based on some IP block or
|
||||
# country code.
|
||||
|
||||
# Path 1: bypassing based on IP block in CIDR notation
|
||||
|
||||
# Explicit IP block specified by user, use it right away
|
||||
# regardless of whether extractor is geo bypassable or not
|
||||
ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
|
||||
|
||||
# Otherwise use random IP block from geo bypass context but only
|
||||
# if extractor is known as geo bypassable
|
||||
if not ip_block:
|
||||
ip_blocks = geo_bypass_context.get('ip_blocks')
|
||||
if self._GEO_BYPASS and ip_blocks:
|
||||
ip_block = random.choice(ip_blocks)
|
||||
|
||||
if ip_block:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen(
|
||||
'[debug] Using fake IP %s as X-Forwarded-For.'
|
||||
% self._x_forwarded_for_ip)
|
||||
return
|
||||
|
||||
# Path 2: bypassing based on country code
|
||||
|
||||
# Explicit country code specified by user, use it right away
|
||||
# regardless of whether extractor is geo bypassable or not
|
||||
country = self._downloader.params.get('geo_bypass_country', None)
|
||||
|
||||
# Otherwise use random country code from geo bypass context but
|
||||
# only if extractor is known as geo bypassable
|
||||
if not country:
|
||||
countries = geo_bypass_context.get('countries')
|
||||
if self._GEO_BYPASS and countries:
|
||||
country = random.choice(countries)
|
||||
|
||||
if country:
|
||||
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
|
||||
if self._downloader.params.get('verbose', False):
|
||||
self._downloader.to_screen(
|
||||
'[debug] Using fake IP %s (%s) as X-Forwarded-For.'
|
||||
% (self._x_forwarded_for_ip, country_code.upper()))
|
||||
% (self._x_forwarded_for_ip, country.upper()))
|
||||
|
||||
def extract(self, url):
|
||||
"""Extracts URL information and returns it in list of dicts."""
|
||||
@@ -488,8 +556,26 @@ class InfoExtractor(object):
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-2])
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
|
||||
""" Returns the response handle """
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, compat_urllib_error.HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
if isinstance(expected_status, compat_integer_types):
|
||||
return err.code == expected_status
|
||||
elif isinstance(expected_status, (list, tuple)):
|
||||
return err.code in expected_status
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
else:
|
||||
assert False
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the response handle.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
if note is None:
|
||||
self.report_download_webpage(video_id)
|
||||
elif note is not False:
|
||||
@@ -518,6 +604,15 @@ class InfoExtractor(object):
|
||||
try:
|
||||
return self._downloader.urlopen(url_or_request)
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
if errnote is None:
|
||||
@@ -530,13 +625,17 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return a tuple (page content as string, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
@@ -625,13 +724,52 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
|
||||
""" Returns the data of the page as a string """
|
||||
def _download_webpage(
|
||||
self, url_or_request, video_id, note=None, errnote=None,
|
||||
fatal=True, tries=1, timeout=5, encoding=None, data=None,
|
||||
headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the data of the page as a string.
|
||||
|
||||
Arguments:
|
||||
url_or_request -- plain text URL as a string or
|
||||
a compat_urllib_request.Requestobject
|
||||
video_id -- Video/playlist/item identifier (string)
|
||||
|
||||
Keyword arguments:
|
||||
note -- note printed before downloading (string)
|
||||
errnote -- note printed in case of an error (string)
|
||||
fatal -- flag denoting whether error should be considered fatal,
|
||||
i.e. whether it should cause ExtractionError to be raised,
|
||||
otherwise a warning will be reported and extraction continued
|
||||
tries -- number of tries
|
||||
timeout -- sleep interval between tries
|
||||
encoding -- encoding for a page content decoding, guessed automatically
|
||||
when not explicitly specified
|
||||
data -- POST data (bytes)
|
||||
headers -- HTTP headers (dict)
|
||||
query -- URL query (dict)
|
||||
expected_status -- allows to accept failed HTTP requests (non 2xx
|
||||
status code) by explicitly specifying a set of accepted status
|
||||
codes. Can be any of the following entities:
|
||||
- an integer type specifying an exact failed status code to
|
||||
accept
|
||||
- a list or a tuple of integer types specifying a list of
|
||||
failed status codes to accept
|
||||
- a callable accepting an actual failed status code and
|
||||
returning True if it should be accepted
|
||||
Note that this argument does not affect success status codes (2xx)
|
||||
which are always accepted.
|
||||
"""
|
||||
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
@@ -647,11 +785,17 @@ class InfoExtractor(object):
|
||||
def _download_xml_handle(
|
||||
self, url_or_request, video_id, note='Downloading XML',
|
||||
errnote='Unable to download XML', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
if res is False:
|
||||
return res
|
||||
xml_string, urlh = res
|
||||
@@ -659,15 +803,21 @@ class InfoExtractor(object):
|
||||
xml_string, video_id, transform_source=transform_source,
|
||||
fatal=fatal), urlh
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
def _download_xml(
|
||||
self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True, encoding=None,
|
||||
data=None, headers={}, query={}, expected_status=None):
|
||||
"""
|
||||
Return the xml as an xml.etree.ElementTree.Element.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_xml_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
|
||||
@@ -685,11 +835,17 @@ class InfoExtractor(object):
|
||||
def _download_json_handle(
|
||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
"""Return a tuple (JSON object, URL handle)"""
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return a tuple (JSON object, URL handle).
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding, data=data, headers=headers, query=query)
|
||||
encoding=encoding, data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
if res is False:
|
||||
return res
|
||||
json_string, urlh = res
|
||||
@@ -700,11 +856,18 @@ class InfoExtractor(object):
|
||||
def _download_json(
|
||||
self, url_or_request, video_id, note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata', transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={},
|
||||
expected_status=None):
|
||||
"""
|
||||
Return the JSON object as a dict.
|
||||
|
||||
See _download_webpage docstring for arguments specification.
|
||||
"""
|
||||
res = self._download_json_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote,
|
||||
transform_source=transform_source, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query)
|
||||
data=data, headers=headers, query=query,
|
||||
expected_status=expected_status)
|
||||
return res if res is False else res[0]
|
||||
|
||||
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
|
||||
@@ -998,8 +1161,7 @@ class InfoExtractor(object):
|
||||
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_ld = self._search_regex(
|
||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_ld:
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
@@ -1057,10 +1219,10 @@ class InfoExtractor(object):
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
@@ -1550,9 +1712,9 @@ class InfoExtractor(object):
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing audio group an audio group, it represents
|
||||
# a complete (with audio and video) format. So, for such cases
|
||||
# we will ignore references to rendition groups and treat them
|
||||
# referencing an audio group it represents a complete
|
||||
# (with audio and video) format. So, for such cases we will
|
||||
# ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
@@ -1708,9 +1870,7 @@ class InfoExtractor(object):
|
||||
'height': height,
|
||||
})
|
||||
formats.extend(m3u8_formats)
|
||||
continue
|
||||
|
||||
if src_ext == 'f4m':
|
||||
elif src_ext == 'f4m':
|
||||
f4m_url = src_url
|
||||
if not f4m_params:
|
||||
f4m_params = {
|
||||
@@ -1720,9 +1880,13 @@ class InfoExtractor(object):
|
||||
f4m_url += '&' if '?' in f4m_url else '?'
|
||||
f4m_url += compat_urllib_parse_urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
continue
|
||||
|
||||
if src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
'url': src_url,
|
||||
@@ -1733,7 +1897,6 @@ class InfoExtractor(object):
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
continue
|
||||
|
||||
return formats
|
||||
|
||||
@@ -1955,7 +2118,21 @@ class InfoExtractor(object):
|
||||
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
|
||||
|
||||
def prepare_template(template_name, identifiers):
|
||||
t = representation_ms_info[template_name]
|
||||
tmpl = representation_ms_info[template_name]
|
||||
# First of, % characters outside $...$ templates
|
||||
# must be escaped by doubling for proper processing
|
||||
# by % operator string formatting used further (see
|
||||
# https://github.com/rg3/youtube-dl/issues/16867).
|
||||
t = ''
|
||||
in_template = False
|
||||
for c in tmpl:
|
||||
t += c
|
||||
if c == '$':
|
||||
in_template = not in_template
|
||||
elif c == '%' and not in_template:
|
||||
t += c
|
||||
# Next, $...$ templates are translated to their
|
||||
# %(...) counterparts to be used with % operator
|
||||
t = t.replace('$RepresentationID$', representation_id)
|
||||
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
|
||||
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
|
||||
@@ -2286,6 +2463,8 @@ class InfoExtractor(object):
|
||||
media_info['subtitles'].setdefault(lang, []).append({
|
||||
'url': absolute_url(src),
|
||||
})
|
||||
for f in media_info['formats']:
|
||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||
if media_info['formats'] or media_info['subtitles']:
|
||||
entries.append(media_info)
|
||||
return entries
|
||||
|
@@ -4,23 +4,21 @@ from __future__ import unicode_literals, division
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# geo restricted to CA
|
||||
'url': 'https://www.crackle.com/andromeda/2502343',
|
||||
'info_dict': {
|
||||
@@ -45,7 +43,10 @@ class CrackleIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.sonycrackle.com/andromeda/2502343',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -83,8 +84,8 @@ class CrackleIE(InfoExtractor):
|
||||
for e in media['MediaURLs']:
|
||||
if e.get('UseDRM') is True:
|
||||
continue
|
||||
format_url = e.get('Path')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(e.get('Path'))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
@@ -121,8 +122,8 @@ class CrackleIE(InfoExtractor):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = cc_file.get('Path')
|
||||
if not cc_url or not isinstance(cc_url, compat_str):
|
||||
cc_url = url_or_none(cc_file.get('Path'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
@@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree as etree
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from .vrv import VRVIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
@@ -18,6 +20,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@@ -26,7 +30,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
extract_attributes,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
@@ -43,13 +46,13 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
data['req'] = 'RpcApi' + method
|
||||
data = compat_urllib_parse_urlencode(data).encode('utf-8')
|
||||
return self._download_xml(
|
||||
'http://www.crunchyroll.com/xml/',
|
||||
'https://www.crunchyroll.com/xml/',
|
||||
video_id, note, fatal=False, data=data, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
@@ -139,7 +142,8 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@@ -148,7 +152,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||
'upload_date': '20131013',
|
||||
'url': 're:(?!.*&)',
|
||||
@@ -221,7 +225,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'upload_date': '20091021',
|
||||
@@ -262,6 +266,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
# Just test metadata extraction
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.crunchyroll.com/media-723735',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMAT_IDS = {
|
||||
@@ -392,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if sub_doc is None:
|
||||
if not isinstance(sub_doc, etree.Element):
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
@@ -434,13 +441,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'vilos\.config\.media\s*=\s*({.+?});',
|
||||
webpage, 'vilos media', default='{}'), video_id)
|
||||
media_metadata = media.get('metadata') or {}
|
||||
|
||||
language = self._search_regex(
|
||||
r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
|
||||
webpage, 'language', default=None, group='lang')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._parse_json(self._html_search_regex(
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id).get('description')
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
@@ -453,92 +469,113 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||
webpage, 'video_uploader', fatal=False)
|
||||
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
video_encode_ids = []
|
||||
formats = []
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata is not None:
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
for stream in media.get('streams', []):
|
||||
audio_lang = stream.get('audio_lang')
|
||||
hardsub_lang = stream.get('hardsub_lang')
|
||||
vrv_formats = self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream.get('format'),
|
||||
audio_lang, hardsub_lang)
|
||||
for f in vrv_formats:
|
||||
if not hardsub_lang:
|
||||
f['preference'] = 1
|
||||
language_preference = 0
|
||||
if audio_lang == language:
|
||||
language_preference += 1
|
||||
if hardsub_lang == language:
|
||||
language_preference += 1
|
||||
if language_preference:
|
||||
f['language_preference'] = language_preference
|
||||
formats.extend(vrv_formats)
|
||||
if not formats:
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
if not available_fmts:
|
||||
available_fmts = self._FORMAT_IDS.keys()
|
||||
video_encode_ids = []
|
||||
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if isinstance(streamdata, etree.Element):
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if isinstance(stream_info, etree.Element):
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
|
||||
|
||||
metadata = self._call_rpc_api(
|
||||
'VideoPlayer_GetMediaMetadata', video_id,
|
||||
@@ -546,16 +583,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'media_id': video_id,
|
||||
})
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
subtitles = {}
|
||||
for subtitle in media.get('subtitles', []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
if not subtitles:
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
|
||||
if isinstance(metadata, etree.Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
if not thumbnail:
|
||||
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@@ -565,7 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': series,
|
||||
@@ -580,7 +640,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
|
||||
IE_NAME = 'crunchyroll:playlist'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
|
||||
|
@@ -11,10 +11,10 @@ class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
|
||||
'md5': '9b8624ba66351a23e0b6e1391971f9af',
|
||||
'info_dict': {
|
||||
'id': '901995',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Extended: \'That person cannot be me\' Johnson says',
|
||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||
'timestamp': 1467286284,
|
||||
|
@@ -35,7 +35,7 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||
return result['data']
|
||||
|
||||
def _real_initialize(self):
|
||||
(email, password) = self._get_login_info()
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
result = self._download_json(
|
||||
|
@@ -3,8 +3,12 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -40,10 +44,15 @@ class CWTVIE(InfoExtractor):
|
||||
'duration': 1263,
|
||||
'series': 'Whose Line Is It Anyway?',
|
||||
'season_number': 11,
|
||||
'season': '11',
|
||||
'episode_number': 20,
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
@@ -58,60 +67,31 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = None
|
||||
formats = []
|
||||
for partner in (154, 213):
|
||||
vdata = self._download_json(
|
||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||
if not vdata:
|
||||
continue
|
||||
video_data = vdata
|
||||
for quality, quality_data in vdata.get('videos', {}).items():
|
||||
quality_url = quality_data.get('uri')
|
||||
if not quality_url:
|
||||
continue
|
||||
if quality == 'variantplaylist':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
tbr = int_or_none(quality_data.get('bitrate'))
|
||||
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||
if self._is_valid_url(quality_url, video_id, format_id):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': quality_url,
|
||||
'tbr': tbr,
|
||||
})
|
||||
video_metadata = video_data['assetFields']
|
||||
ism_url = video_metadata.get('smoothStreamingUrl')
|
||||
if ism_url:
|
||||
formats.extend(self._extract_ism_formats(
|
||||
ism_url, video_id, ism_id='mss', fatal=False))
|
||||
self._sort_formats(formats)
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||
|
||||
thumbnails = [{
|
||||
'url': image['uri'],
|
||||
'width': image.get('width'),
|
||||
'height': image.get('height'),
|
||||
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': video_metadata['UnicornCcUrl'],
|
||||
}],
|
||||
} if video_metadata.get('UnicornCcUrl') else None
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
if episode and season:
|
||||
episode = episode.lstrip(season)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video_metadata['title'],
|
||||
'description': video_metadata.get('description'),
|
||||
'duration': int_or_none(video_metadata.get('duration')),
|
||||
'series': video_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(video_metadata.get('seasonNumber')),
|
||||
'season': video_metadata.get('seasonName'),
|
||||
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
|
||||
'timestamp': parse_iso8601(video_data.get('startTime')),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description_long'),
|
||||
'duration': int_or_none(video_data.get('duration_secs')),
|
||||
'series': video_data.get('series_name'),
|
||||
'season_number': int_or_none(season),
|
||||
'episode_number': int_or_none(episode),
|
||||
'timestamp': parse_iso8601(video_data.get('start_time')),
|
||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
||||
|
@@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
|
||||
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||
|
||||
video_sources = self._download_json(sources_url, video_id)
|
||||
body = video_sources.get('body')
|
||||
if body:
|
||||
video_sources = body
|
||||
|
||||
formats = []
|
||||
for rendition in video_sources['renditions']:
|
||||
|
@@ -1,22 +1,32 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_struct_pack
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
error_to_compat_str,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
OnDemandPagedList,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
mimetype2ext,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@@ -64,7 +74,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader': 'Deadline',
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
@@ -141,7 +150,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
|
||||
view_count_str = self._search_regex(
|
||||
@@ -164,8 +174,34 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
player = self._parse_json(player_v5, video_id, fatal=False) or {}
|
||||
metadata = try_get(player, lambda x: x['metadata'], dict)
|
||||
if not metadata:
|
||||
metadata_url = url_or_none(try_get(
|
||||
player, lambda x: x['context']['metadata_template_url1']))
|
||||
if metadata_url:
|
||||
metadata_url = metadata_url.replace(':videoId', video_id)
|
||||
else:
|
||||
metadata_url = update_url_query(
|
||||
'https://www.dailymotion.com/player/metadata/video/%s'
|
||||
% video_id, {
|
||||
'embedder': url,
|
||||
'integration': 'inline',
|
||||
'GK_PV5_NEON': '1',
|
||||
})
|
||||
metadata = self._download_json(
|
||||
metadata_url, video_id, 'Downloading metadata JSON')
|
||||
|
||||
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password:
|
||||
r = int(metadata['id'][1:], 36)
|
||||
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
|
||||
t = ''.join(random.choice(string.ascii_letters) for i in range(10))
|
||||
n = us64e(compat_struct_pack('I', r))
|
||||
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
|
||||
metadata = self._download_json(
|
||||
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
|
||||
|
||||
self._check_error(metadata)
|
||||
|
||||
@@ -180,9 +216,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
continue
|
||||
ext = mimetype2ext(type_) or determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', preference=-1,
|
||||
m3u8_id='hls', fatal=False))
|
||||
m3u8_id='hls', fatal=False)
|
||||
for f in m3u8_formats:
|
||||
f['url'] = f['url'].split('#')[0]
|
||||
formats.append(f)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
|
||||
@@ -299,8 +338,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
def _check_error(self, info):
|
||||
error = info.get('error')
|
||||
if info.get('error') is not None:
|
||||
title = error['title']
|
||||
if error:
|
||||
title = error.get('title') or error['message']
|
||||
# See https://developer.dailymotion.com/api#access-error
|
||||
if error.get('code') == 'DM007':
|
||||
self.raise_geo_restricted(msg=title)
|
||||
@@ -325,17 +364,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:playlist'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||
'info_dict': {
|
||||
'title': 'SPORT',
|
||||
'id': 'xv4bw_nqtv_sport',
|
||||
'id': 'xv4bw',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||
page += 1
|
||||
videos = self._download_json(
|
||||
'https://graphql.api.dailymotion.com',
|
||||
playlist_id, 'Downloading page %d' % page,
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
collection(xid: "%s") {
|
||||
videos(first: %d, page: %d) {
|
||||
pageInfo {
|
||||
hasNextPage
|
||||
nextPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
xid
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||
}).encode(), headers={
|
||||
'Authorization': authorizaion,
|
||||
'Origin': 'https://www.dailymotion.com',
|
||||
})['data']['collection']['videos']
|
||||
for edge in videos['edges']:
|
||||
node = edge['node']
|
||||
yield self.url_result(
|
||||
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
api = self._parse_json(self._search_regex(
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||
webpage, 'player config'), playlist_id)['context']['api']
|
||||
auth = self._download_json(
|
||||
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||
playlist_id, data=urlencode_postdata({
|
||||
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||
'grant_type': 'client_credentials',
|
||||
}))
|
||||
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id,
|
||||
self._og_search_title(webpage))
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _extract_entries(self, id):
|
||||
video_ids = set()
|
||||
@@ -361,43 +476,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
playlist_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'entries': self._extract_entries(playlist_id),
|
||||
}
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
'info_dict': {
|
||||
'id': 'nqtv',
|
||||
'title': 'Rémi Gaillard',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||
'info_dict': {
|
||||
'id': 'UnderProject',
|
||||
'title': 'UnderProject',
|
||||
},
|
||||
'playlist_mincount': 1800,
|
||||
'expected_warnings': [
|
||||
'Stopped at duplicated page',
|
||||
],
|
||||
'skip': 'Takes too long time',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
user = mobj.group('user')
|
||||
|
@@ -5,13 +5,16 @@ from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DctpTvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# 4x3
|
||||
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
|
||||
'info_dict': {
|
||||
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
|
||||
@@ -19,37 +22,55 @@ class DctpTvIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': 'Videoinstallation für eine Kaufhausfassade',
|
||||
'description': 'Kurzfilm',
|
||||
'upload_date': '20110407',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 71.24,
|
||||
'timestamp': 1302172322,
|
||||
'upload_date': '20110407',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# 16x9
|
||||
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
version = self._download_json(
|
||||
'%s/version.json' % self._BASE_URL, display_id,
|
||||
'Downloading version JSON')
|
||||
|
||||
video_id = self._html_search_meta(
|
||||
'DC.identifier', webpage, 'video id',
|
||||
default=None) or self._search_regex(
|
||||
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
|
||||
restapi_base = '%s/%s/restapi' % (
|
||||
self._BASE_URL, version['version_name'])
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
info = self._download_json(
|
||||
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
|
||||
'Downloading video info JSON')
|
||||
|
||||
media = self._download_json(
|
||||
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
|
||||
display_id, 'Downloading media JSON')
|
||||
|
||||
uuid = media['uuid']
|
||||
title = media['title']
|
||||
ratio = '16x9' if media.get('is_wide') else '4x3'
|
||||
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
|
||||
|
||||
servers = self._download_json(
|
||||
'http://www.dctp.tv/streaming_servers/', display_id,
|
||||
note='Downloading server list', fatal=False)
|
||||
note='Downloading server list JSON', fatal=False)
|
||||
|
||||
if servers:
|
||||
endpoint = next(
|
||||
server['endpoint']
|
||||
for server in servers
|
||||
if isinstance(server.get('endpoint'), compat_str) and
|
||||
if url_or_none(server.get('endpoint')) and
|
||||
'cloudfront' in server['endpoint'])
|
||||
else:
|
||||
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
|
||||
@@ -60,27 +81,35 @@ class DctpTvIE(InfoExtractor):
|
||||
formats = [{
|
||||
'url': endpoint,
|
||||
'app': app,
|
||||
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
|
||||
'play_path': play_path,
|
||||
'page_url': url,
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
|
||||
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
|
||||
'ext': 'flv',
|
||||
}]
|
||||
|
||||
description = self._html_search_meta('DC.description', webpage)
|
||||
upload_date = unified_strdate(
|
||||
self._html_search_meta('DC.date.created', webpage))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = float_or_none(self._search_regex(
|
||||
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
|
||||
default=None), scale=1000)
|
||||
thumbnails = []
|
||||
images = media.get('images')
|
||||
if isinstance(images, list):
|
||||
for image in images:
|
||||
if not isinstance(image, dict):
|
||||
continue
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'id': uuid,
|
||||
'display_id': display_id,
|
||||
'description': description,
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'title': title,
|
||||
'alt_title': media.get('subtitle'),
|
||||
'description': media.get('description') or media.get('teaser'),
|
||||
'timestamp': unified_timestamp(media.get('created')),
|
||||
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@@ -5,7 +5,10 @@ import re
|
||||
import string
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_str
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
try_get,
|
||||
@@ -55,15 +58,27 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
|
||||
video_id = video['id']
|
||||
|
||||
access_token = self._download_json(
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
|
||||
# prefer Affiliate Auth Token over Anonymous Auth Token
|
||||
auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(compat_urllib_parse_unquote(
|
||||
compat_urllib_parse_unquote(auth_storage_cookie.value)),
|
||||
video_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
'https://www.%s.com/anonymous' % site, display_id, query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': try_get(
|
||||
react_data, lambda x: x['application']['apiClientId'],
|
||||
compat_str) or '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
|
||||
'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
|
||||
})['access_token']
|
||||
|
||||
try:
|
||||
stream = self._download_json(
|
||||
@@ -72,7 +87,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
@@ -12,6 +11,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = caption.get('fileUrl')
|
||||
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||
not subtitle_url.startswith('http')):
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
|
@@ -3,8 +3,8 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveLegacyIE
|
||||
from .dplay import DPlayIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
@@ -12,8 +12,13 @@ from ..compat import (
|
||||
from ..utils import smuggle_url
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
|
||||
class DiscoveryNetworksDeIE(DPlayIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
|
||||
(?:
|
||||
.*\#(?P<id>\d+)|
|
||||
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
|
||||
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
|
||||
)'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
|
||||
@@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
alternate_id = mobj.group('alternate_id')
|
||||
if alternate_id:
|
||||
self._initialize_geo_bypass({
|
||||
'countries': ['DE'],
|
||||
})
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (mobj.group('programme'), alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
|
||||
brightcove_id = mobj.group('id')
|
||||
if not brightcove_id:
|
||||
title = mobj.group('title')
|
||||
|
@@ -21,6 +21,7 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
urljoin,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
@@ -97,12 +98,83 @@ class DPlayIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_disco_api_info(self, url, display_id, disco_host, realm):
|
||||
disco_base = 'https://' + disco_host
|
||||
token = self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': realm,
|
||||
})['data']['attributes']['token']
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Authorization': 'Bearer ' + token,
|
||||
}
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
headers=headers, query={
|
||||
'include': 'show'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id, headers=headers)['data']['attributes']['streaming'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id='dash', fatal=False))
|
||||
elif format_id == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('id')
|
||||
domain = mobj.group('domain')
|
||||
|
||||
self._initialize_geo_bypass([mobj.group('country').upper()])
|
||||
self._initialize_geo_bypass({
|
||||
'countries': [mobj.group('country').upper()],
|
||||
})
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
@@ -111,72 +183,8 @@ class DPlayIE(InfoExtractor):
|
||||
|
||||
if not video_id:
|
||||
host = mobj.group('host')
|
||||
disco_base = 'https://disco-api.%s' % host
|
||||
self._download_json(
|
||||
'%s/token' % disco_base, display_id, 'Downloading token',
|
||||
query={
|
||||
'realm': host.replace('.', ''),
|
||||
})
|
||||
video = self._download_json(
|
||||
'%s/content/videos/%s' % (disco_base, display_id), display_id,
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
|
||||
}, query={
|
||||
'include': 'show'
|
||||
})
|
||||
video_id = video['data']['id']
|
||||
info = video['data']['attributes']
|
||||
title = info['name']
|
||||
formats = []
|
||||
for format_id, format_dict in self._download_json(
|
||||
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
|
||||
display_id)['data']['attributes']['streaming'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if format_id == 'dash' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, display_id, mpd_id='dash', fatal=False))
|
||||
elif format_id == 'hls' or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = None
|
||||
try:
|
||||
included = video.get('included')
|
||||
if isinstance(included, list):
|
||||
show = next(e for e in included if e.get('type') == 'show')
|
||||
series = try_get(
|
||||
show, lambda x: x['attributes']['name'], compat_str)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': info.get('description'),
|
||||
'duration': float_or_none(
|
||||
info.get('videoDuration'), scale=1000),
|
||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(info.get('seasonNumber')),
|
||||
'episode_number': int_or_none(info.get('episodeNumber')),
|
||||
'age_limit': int_or_none(info.get('minimum_age')),
|
||||
'formats': formats,
|
||||
}
|
||||
return self._get_disco_api_info(
|
||||
url, display_id, 'disco-api.' + host, host.replace('.', ''))
|
||||
|
||||
info = self._download_json(
|
||||
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
|
||||
@@ -303,9 +311,11 @@ class DPlayItIE(InfoExtractor):
|
||||
|
||||
if not info:
|
||||
info_url = self._search_regex(
|
||||
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
|
||||
webpage, 'info url')
|
||||
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
|
||||
webpage, 'info url', group='url')
|
||||
|
||||
info_url = urljoin(url, info_url)
|
||||
video_id = info_url.rpartition('/')[-1]
|
||||
|
||||
try:
|
||||
@@ -315,6 +325,8 @@ class DPlayItIE(InfoExtractor):
|
||||
'dplayit_token').value,
|
||||
'Referer': url,
|
||||
})
|
||||
if isinstance(info, compat_str):
|
||||
info = self._parse_json(info, display_id)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
||||
@@ -330,6 +342,7 @@ class DPlayItIE(InfoExtractor):
|
||||
formats = self._extract_m3u8_formats(
|
||||
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
|
||||
|
@@ -7,7 +7,6 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
@@ -17,6 +16,7 @@ from ..utils import (
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ class DramaFeverBaseIE(InfoExtractor):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
@@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for sub in subs:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url or not isinstance(sub_url, compat_str):
|
||||
sub_url = url_or_none(sub.get('url'))
|
||||
if not sub_url:
|
||||
continue
|
||||
subtitles.setdefault(
|
||||
sub.get('code') or sub.get('language') or 'en', []).append({
|
||||
@@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
|
||||
for format_id, format_dict in download_assets.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = format_dict.get('url')
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
|
@@ -8,7 +8,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
xpath_text,
|
||||
determine_ext,
|
||||
qualities,
|
||||
float_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
@@ -16,7 +15,8 @@ from ..utils import (
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||
@@ -43,7 +43,8 @@ class DreiSatIE(InfoExtractor):
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
param_groups = {}
|
||||
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
||||
group_id = param_group.attrib.get(self._xpath_ns('id', 'http://www.w3.org/XML/1998/namespace'))
|
||||
group_id = param_group.get(self._xpath_ns(
|
||||
'id', 'http://www.w3.org/XML/1998/namespace'))
|
||||
params = {}
|
||||
for param in param_group:
|
||||
params[param.get('name')] = param.get('value')
|
||||
@@ -54,7 +55,7 @@ class DreiSatIE(InfoExtractor):
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||
group_id = video.get('paramGroup')
|
||||
param_group = param_groups[group_id]
|
||||
for proto in param_group['protocols'].split(','):
|
||||
@@ -75,66 +76,36 @@ class DreiSatIE(InfoExtractor):
|
||||
note='Downloading video info',
|
||||
errnote='Failed to download video info')
|
||||
|
||||
status_code = doc.find('./status/statuscode')
|
||||
if status_code is not None and status_code.text != 'ok':
|
||||
code = status_code.text
|
||||
if code == 'notVisibleAnymore':
|
||||
status_code = xpath_text(doc, './status/statuscode')
|
||||
if status_code and status_code != 'ok':
|
||||
if status_code == 'notVisibleAnymore':
|
||||
message = 'Video %s is not available' % video_id
|
||||
else:
|
||||
message = '%s returned error: %s' % (self.IE_NAME, code)
|
||||
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
title = doc.find('.//information/title').text
|
||||
description = xpath_text(doc, './/information/detail', 'description')
|
||||
duration = int_or_none(xpath_text(doc, './/details/lengthSec', 'duration'))
|
||||
uploader = xpath_text(doc, './/details/originChannelTitle', 'uploader')
|
||||
uploader_id = xpath_text(doc, './/details/originChannelId', 'uploader id')
|
||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime', 'upload date'))
|
||||
title = xpath_text(doc, './/information/title', 'title', True)
|
||||
|
||||
def xml_to_thumbnails(fnode):
|
||||
thumbnails = []
|
||||
for node in fnode:
|
||||
thumbnail_url = node.text
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
if 'key' in node.attrib:
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key'])
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
return thumbnails
|
||||
|
||||
thumbnails = xml_to_thumbnails(doc.findall('.//teaserimages/teaserimage'))
|
||||
|
||||
format_nodes = doc.findall('.//formitaeten/formitaet')
|
||||
quality = qualities(['veryhigh', 'high', 'med', 'low'])
|
||||
|
||||
def get_quality(elem):
|
||||
return quality(xpath_text(elem, 'quality'))
|
||||
format_nodes.sort(key=get_quality)
|
||||
format_ids = []
|
||||
urls = []
|
||||
formats = []
|
||||
for fnode in format_nodes:
|
||||
video_url = fnode.find('url').text
|
||||
is_available = 'http://www.metafilegenerator' not in video_url
|
||||
if not is_available:
|
||||
for fnode in doc.findall('.//formitaeten/formitaet'):
|
||||
video_url = xpath_text(fnode, 'url')
|
||||
if not video_url or video_url in urls:
|
||||
continue
|
||||
urls.append(video_url)
|
||||
|
||||
is_available = 'http://www.metafilegenerator' not in video_url
|
||||
geoloced = 'static_geoloced_online' in video_url
|
||||
if not is_available or geoloced:
|
||||
continue
|
||||
|
||||
format_id = fnode.attrib['basetype']
|
||||
quality = xpath_text(fnode, './quality', 'quality')
|
||||
format_m = re.match(r'''(?x)
|
||||
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||
''', format_id)
|
||||
|
||||
ext = determine_ext(video_url, None) or format_m.group('container')
|
||||
if ext not in ('smil', 'f4m', 'm3u8'):
|
||||
format_id = format_id + '-' + quality
|
||||
if format_id in format_ids:
|
||||
continue
|
||||
|
||||
if ext == 'meta':
|
||||
continue
|
||||
@@ -147,24 +118,23 @@ class DreiSatIE(InfoExtractor):
|
||||
if video_url.startswith('https://'):
|
||||
continue
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
proto = format_m.group('proto').lower()
|
||||
quality = xpath_text(fnode, './quality')
|
||||
if quality:
|
||||
format_id += '-' + quality
|
||||
|
||||
abr = int_or_none(xpath_text(fnode, './audioBitrate', 'abr'), 1000)
|
||||
vbr = int_or_none(xpath_text(fnode, './videoBitrate', 'vbr'), 1000)
|
||||
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
||||
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
||||
|
||||
width = int_or_none(xpath_text(fnode, './width', 'width'))
|
||||
height = int_or_none(xpath_text(fnode, './height', 'height'))
|
||||
|
||||
filesize = int_or_none(xpath_text(fnode, './filesize', 'filesize'))
|
||||
|
||||
format_note = ''
|
||||
if not format_note:
|
||||
format_note = None
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d+)k', video_url, 'bitrate', None))
|
||||
if tbr and vbr and not abr:
|
||||
abr = tbr - vbr
|
||||
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@@ -174,31 +144,50 @@ class DreiSatIE(InfoExtractor):
|
||||
'vcodec': format_m.group('vcodec'),
|
||||
'abr': abr,
|
||||
'vbr': vbr,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'filesize': filesize,
|
||||
'format_note': format_note,
|
||||
'protocol': proto,
|
||||
'_available': is_available,
|
||||
'tbr': tbr,
|
||||
'width': int_or_none(xpath_text(fnode, './width')),
|
||||
'height': int_or_none(xpath_text(fnode, './height')),
|
||||
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
||||
'protocol': format_m.group('proto').lower(),
|
||||
})
|
||||
format_ids.append(format_id)
|
||||
|
||||
geolocation = xpath_text(doc, './/details/geolocation')
|
||||
if not formats and geolocation and geolocation != 'none':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for node in doc.findall('.//teaserimages/teaserimage'):
|
||||
thumbnail_url = node.text
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
thumbnail_key = node.get('key')
|
||||
if thumbnail_key:
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'description': xpath_text(doc, './/information/detail'),
|
||||
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
||||
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
video_id = self._match_id(url)
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
||||
return self.extract_from_xml_url(video_id, details_url)
|
||||
|
83
youtube_dl/extractor/dtube.py
Normal file
83
youtube_dl/extractor/dtube.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
from socket import timeout
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class DTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
|
||||
_TEST = {
|
||||
'url': 'https://d.tube/#!/v/benswann/zqd630em',
|
||||
'md5': 'a03eaa186618ffa7a3145945543a251e',
|
||||
'info_dict': {
|
||||
'id': 'zqd630em',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
|
||||
'description': 'md5:700d164e066b87f9eac057949e4227c2',
|
||||
'uploader_id': 'benswann',
|
||||
'upload_date': '20180222',
|
||||
'timestamp': 1519328958,
|
||||
},
|
||||
'params': {
|
||||
'format': '480p',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, video_id = re.match(self._VALID_URL, url).groups()
|
||||
result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'get_content',
|
||||
'params': [uploader_id, video_id],
|
||||
}).encode())['result']
|
||||
|
||||
metadata = json.loads(result['json_metadata'])
|
||||
video = metadata['video']
|
||||
content = video['content']
|
||||
info = video.get('info', {})
|
||||
title = info.get('title') or result['title']
|
||||
|
||||
def canonical_url(h):
|
||||
if not h:
|
||||
return None
|
||||
return 'https://ipfs.io/ipfs/' + h
|
||||
|
||||
formats = []
|
||||
for q in ('240', '480', '720', '1080', ''):
|
||||
video_url = canonical_url(content.get('video%shash' % q))
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = (q + 'p') if q else 'Source'
|
||||
try:
|
||||
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
||||
self._downloader._opener.open(video_url, timeout=5).close()
|
||||
except timeout:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'height': int_or_none(q),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': content.get('description'),
|
||||
'thumbnail': canonical_url(info.get('snaphash')),
|
||||
'tags': content.get('tags') or metadata.get('tags'),
|
||||
'duration': info.get('duration'),
|
||||
'formats': formats,
|
||||
'timestamp': parse_iso8601(result.get('created')),
|
||||
'uploader_id': uploader_id,
|
||||
}
|
@@ -91,17 +91,6 @@ class DVTVIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/',
|
||||
'md5': '87defe16681b1429c91f7a74809823c6',
|
||||
'info_dict': {
|
||||
'id': 'f5ae72f6fad611e794dbac1f6b220ee8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _parse_video_metadata(self, js, video_id, live_js=None):
|
||||
|
@@ -4,14 +4,12 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
video_id, 'Downloading mp4 JSON', fatal=False)
|
||||
if mp4_data:
|
||||
for format_id, format_url in mp4_data.get('data', {}).items():
|
||||
if not isinstance(format_url, compat_str):
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
height = int_or_none(format_id)
|
||||
if height is not None and m3u8_formats_dict.get(height):
|
||||
|
@@ -8,6 +8,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_url = lesson.get('http_url')
|
||||
if not lesson_url or not isinstance(lesson_url, compat_str):
|
||||
lesson_url = url_or_none(lesson.get('http_url'))
|
||||
if not lesson_url:
|
||||
continue
|
||||
lesson_id = lesson.get('id')
|
||||
if lesson_id:
|
||||
@@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for _, format_url in lesson['media_urls'].items():
|
||||
if not format_url or not isinstance(format_url, compat_str):
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
|
@@ -9,8 +9,10 @@ from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,10 +26,16 @@ class EpornerIE(InfoExtractor):
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
'description': 'md5:764f39abf932daafa37485eb46efa152',
|
||||
'timestamp': 1232520922,
|
||||
'upload_date': '20090121',
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118'
|
||||
}
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
@@ -82,8 +90,8 @@ class EpornerIE(InfoExtractor):
|
||||
for format_id, format_dict in formats_dict.items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
src = format_dict.get('src')
|
||||
if not isinstance(src, compat_str) or not src.startswith('http'):
|
||||
src = url_or_none(format_dict.get('src'))
|
||||
if not src or not src.startswith('http'):
|
||||
continue
|
||||
if kind == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -103,12 +111,15 @@ class EpornerIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
json_ld = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
return merge_dicts(json_ld, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
@@ -116,4 +127,4 @@ class EpornerIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
98
youtube_dl/extractor/expressen.py
Normal file
98
youtube_dl/extractor/expressen.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class ExpressenIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?expressen\.se/
|
||||
(?:(?:tvspelare/video|videoplayer/embed)/)?
|
||||
tv/(?:[^/]+/)*
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
|
||||
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
|
||||
'info_dict': {
|
||||
'id': '8690962',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
|
||||
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 788,
|
||||
'timestamp': 1526639109,
|
||||
'upload_date': '20180518',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url') for mobj in re.finditer(
|
||||
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def extract_data(name):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, 'info', group='value'),
|
||||
display_id, transform_source=unescapeHTML)
|
||||
|
||||
info = extract_data('video-tracking-info')
|
||||
video_id = info['videoId']
|
||||
|
||||
data = extract_data('article-data')
|
||||
stream = data['stream']
|
||||
|
||||
if determine_ext(stream) == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(
|
||||
stream, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
else:
|
||||
formats = [{
|
||||
'url': stream,
|
||||
}]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = info.get('titleRaw') or data['title']
|
||||
description = info.get('descriptionRaw')
|
||||
thumbnail = info.get('socialMediaImage') or data.get('image')
|
||||
duration = int_or_none(info.get('videoTotalSecondsDuration') or
|
||||
data.get('totalSecondsDuration'))
|
||||
timestamp = unified_timestamp(info.get('publishDate'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'formats': formats,
|
||||
}
|
@@ -44,6 +44,7 @@ from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
from .aliexpress import AliExpressLiveIE
|
||||
from .apa import APAIE
|
||||
from .aparat import AparatIE
|
||||
from .appleconnect import AppleConnectIE
|
||||
from .appletrailers import (
|
||||
@@ -53,6 +54,7 @@ from .appletrailers import (
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
)
|
||||
@@ -86,11 +88,7 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
@@ -117,6 +115,10 @@ from .bilibili import (
|
||||
BiliBiliBangumiIE,
|
||||
)
|
||||
from .biobiochiletv import BioBioChileTVIE
|
||||
from .bitchute import (
|
||||
BitChuteIE,
|
||||
BitChuteChannelIE,
|
||||
)
|
||||
from .biqle import BIQLEIE
|
||||
from .bleacherreport import (
|
||||
BleacherReportIE,
|
||||
@@ -137,6 +139,7 @@ from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
from .c56 import C56IE
|
||||
@@ -144,6 +147,8 @@ from .camdemy import (
|
||||
CamdemyIE,
|
||||
CamdemyFolderIE
|
||||
)
|
||||
from .cammodels import CamModelsIE
|
||||
from .camtube import CamTubeIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
@@ -189,17 +194,25 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
)
|
||||
from .cjsw import CJSWIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import CNBCIE
|
||||
from .cnbc import (
|
||||
CNBCIE,
|
||||
CNBCVideoIE,
|
||||
)
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
@@ -281,6 +294,7 @@ from .drtv import (
|
||||
DRTVIE,
|
||||
DRTVLiveIE,
|
||||
)
|
||||
from .dtube import DTubeIE
|
||||
from .dvtv import DVTVIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
@@ -329,6 +343,7 @@ from .esri import EsriVideoIE
|
||||
from .europa import EuropaIE
|
||||
from .everyonesmixtape import EveryonesMixtapeIE
|
||||
from .expotv import ExpoTVIE
|
||||
from .expressen import ExpressenIE
|
||||
from .extremetube import ExtremeTubeIE
|
||||
from .eyedotv import EyedoTVIE
|
||||
from .facebook import (
|
||||
@@ -366,7 +381,6 @@ from .foxgay import FoxgayIE
|
||||
from .foxnews import (
|
||||
FoxNewsIE,
|
||||
FoxNewsArticleIE,
|
||||
FoxNewsInsiderIE,
|
||||
)
|
||||
from .foxsports import FoxSportsIE
|
||||
from .franceculture import FranceCultureIE
|
||||
@@ -376,6 +390,7 @@ from .francetv import (
|
||||
FranceTVSiteIE,
|
||||
FranceTVEmbedIE,
|
||||
FranceTVInfoIE,
|
||||
FranceTVInfoSportIE,
|
||||
FranceTVJeunesseIE,
|
||||
GenerationWhatIE,
|
||||
CultureboxIE,
|
||||
@@ -383,6 +398,11 @@ from .francetv import (
|
||||
from .freesound import FreesoundIE
|
||||
from .freespeech import FreespeechIE
|
||||
from .freshlive import FreshLiveIE
|
||||
from .frontendmasters import (
|
||||
FrontendMastersIE,
|
||||
FrontendMastersLessonIE,
|
||||
FrontendMastersCourseIE
|
||||
)
|
||||
from .funimation import FunimationIE
|
||||
from .funk import (
|
||||
FunkMixIE,
|
||||
@@ -463,13 +483,11 @@ from .imdb import (
|
||||
from .imgur import (
|
||||
ImgurIE,
|
||||
ImgurAlbumIE,
|
||||
ImgurGalleryIE,
|
||||
)
|
||||
from .ina import InaIE
|
||||
from .inc import IncIE
|
||||
from .indavideo import (
|
||||
IndavideoIE,
|
||||
IndavideoEmbedIE,
|
||||
)
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .infoq import InfoQIE
|
||||
from .instagram import InstagramIE, InstagramUserIE
|
||||
from .internazionale import InternazionaleIE
|
||||
@@ -477,7 +495,10 @@ from .internetvideoarchive import InternetVideoArchiveIE
|
||||
from .iprima import IPrimaIE
|
||||
from .iqiyi import IqiyiIE
|
||||
from .ir90tv import Ir90TvIE
|
||||
from .itv import ITVIE
|
||||
from .itv import (
|
||||
ITVIE,
|
||||
ITVBTCCIE,
|
||||
)
|
||||
from .ivi import (
|
||||
IviIE,
|
||||
IviCompilationIE
|
||||
@@ -504,6 +525,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
@@ -522,6 +544,7 @@ from .la7 import LA7IE
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
@@ -531,6 +554,10 @@ from .lcp import (
|
||||
)
|
||||
from .learnr import LearnrIE
|
||||
from .lecture2go import Lecture2GoIE
|
||||
from .lecturio import (
|
||||
LecturioIE,
|
||||
LecturioCourseIE,
|
||||
)
|
||||
from .leeco import (
|
||||
LeIE,
|
||||
LePlaylistIE,
|
||||
@@ -551,6 +578,10 @@ from .limelight import (
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
)
|
||||
from .litv import LiTVIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
@@ -576,13 +607,16 @@ from .mailru import (
|
||||
MailRuMusicIE,
|
||||
MailRuMusicSearchIE,
|
||||
)
|
||||
from .makerschannel import MakersChannelIE
|
||||
from .makertv import MakerTVIE
|
||||
from .mangomolo import (
|
||||
MangomoloVideoIE,
|
||||
MangomoloLiveIE,
|
||||
)
|
||||
from .manyvids import ManyVidsIE
|
||||
from .markiza import (
|
||||
MarkizaIE,
|
||||
MarkizaPageIE,
|
||||
)
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mdr import MDRIE
|
||||
@@ -619,7 +653,6 @@ from .mnet import MnetIE
|
||||
from .moevideo import MoeVideoIE
|
||||
from .mofosex import MofosexIE
|
||||
from .mojvideo import MojvideoIE
|
||||
from .moniker import MonikerIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
@@ -640,6 +673,7 @@ from .mtv import (
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .musicplayon import MusicPlayOnIE
|
||||
from .mwave import MwaveIE, MwaveMeetGreetIE
|
||||
from .mychannels import MyChannelsIE
|
||||
from .myspace import MySpaceIE, MySpaceAlbumIE
|
||||
from .myspass import MySpassIE
|
||||
from .myvi import (
|
||||
@@ -661,6 +695,7 @@ from .nbc import (
|
||||
NBCOlympicsIE,
|
||||
NBCOlympicsStreamIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsStreamIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
from .ndr import (
|
||||
@@ -700,12 +735,7 @@ from .nexx import (
|
||||
from .nfb import NFBIE
|
||||
from .nfl import NFLIE
|
||||
from .nhk import NhkVodIE
|
||||
from .nhl import (
|
||||
NHLVideocenterIE,
|
||||
NHLNewsIE,
|
||||
NHLVideocenterCategoryIE,
|
||||
NHLIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
NickIE,
|
||||
NickBrIE,
|
||||
@@ -714,10 +744,7 @@ from .nick import (
|
||||
NickRuIE,
|
||||
)
|
||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaStackIE,
|
||||
NineCNineMediaIE,
|
||||
)
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
@@ -728,7 +755,10 @@ from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
@@ -760,7 +790,9 @@ from .nrk import (
|
||||
NRKSkoleIE,
|
||||
NRKTVIE,
|
||||
NRKTVDirekteIE,
|
||||
NRKTVEpisodeIE,
|
||||
NRKTVEpisodesIE,
|
||||
NRKTVSeasonIE,
|
||||
NRKTVSeriesIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@@ -805,6 +837,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .people import PeopleIE
|
||||
from .performgroup import PerformGroupIE
|
||||
from .periscope import (
|
||||
@@ -849,6 +882,10 @@ from .pornhub import (
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .puhutv import (
|
||||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .presstv import PressTVIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
@@ -880,7 +917,10 @@ from .rai import (
|
||||
RaiPlayPlaylistIE,
|
||||
RaiIE,
|
||||
)
|
||||
from .raywenderlich import RayWenderlichIE
|
||||
from .raywenderlich import (
|
||||
RayWenderlichIE,
|
||||
RayWenderlichCourseIE,
|
||||
)
|
||||
from .rbmaradio import RBMARadioIE
|
||||
from .rds import RDSIE
|
||||
from .redbulltv import RedBullTVIE
|
||||
@@ -1010,10 +1050,13 @@ from .spankbang import SpankBangIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
from .spike import SpikeIE
|
||||
from .spike import (
|
||||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@@ -1033,6 +1076,7 @@ from .stretchinternet import StretchInternetIE
|
||||
from .sunporno import SunPornoIE
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPageIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
@@ -1047,6 +1091,10 @@ from .tass import TassIE
|
||||
from .tastytrade import TastyTradeIE
|
||||
from .tbs import TBSIE
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
TeachableCourseIE,
|
||||
)
|
||||
from .teachertube import (
|
||||
TeacherTubeIE,
|
||||
TeacherTubeUserIE,
|
||||
@@ -1055,6 +1103,7 @@ from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tele5 import Tele5IE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
@@ -1084,6 +1133,10 @@ from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
from .thisoldhouse import ThisOldHouseIE
|
||||
from .threeqsdn import ThreeQSDNIE
|
||||
from .tiktok import (
|
||||
TikTokIE,
|
||||
TikTokUserIE,
|
||||
)
|
||||
from .tinypic import TinyPicIE
|
||||
from .tmz import (
|
||||
TMZIE,
|
||||
@@ -1121,7 +1174,6 @@ from .tv2 import (
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
@@ -1136,6 +1188,7 @@ from .tvc import (
|
||||
from .tvigle import TvigleIE
|
||||
from .tvland import TVLandIE
|
||||
from .tvn24 import TVN24IE
|
||||
from .tvnet import TVNetIE
|
||||
from .tvnoe import TVNoeIE
|
||||
from .tvnow import (
|
||||
TVNowIE,
|
||||
@@ -1150,12 +1203,14 @@ from .tvp import (
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import TwitCastingIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@@ -1189,10 +1244,6 @@ from .uplynk import (
|
||||
UplynkIE,
|
||||
UplynkPreplayIE,
|
||||
)
|
||||
from .upskill import (
|
||||
UpskillIE,
|
||||
UpskillCourseIE,
|
||||
)
|
||||
from .urort import UrortIE
|
||||
from .urplay import URPlayIE
|
||||
from .usanetwork import USANetworkIE
|
||||
@@ -1261,6 +1312,7 @@ from .vimeo import (
|
||||
VimeoReviewIE,
|
||||
VimeoUserIE,
|
||||
VimeoWatchLaterIE,
|
||||
VHXEmbedIE,
|
||||
)
|
||||
from .vimple import VimpleIE
|
||||
from .vine import (
|
||||
@@ -1271,6 +1323,7 @@ from .viki import (
|
||||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
ViuPlaylistIE,
|
||||
@@ -1347,6 +1400,7 @@ from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@@ -1396,6 +1450,7 @@ from .younow import (
|
||||
YouNowMomentIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
@@ -1419,10 +1474,23 @@ from .youtube import (
|
||||
from .zapiks import ZapiksIE
|
||||
from .zaq1 import Zaq1IE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
EinsUndEinsTVIE,
|
||||
EWETVIE,
|
||||
GlattvisionTVIE,
|
||||
MNetTVIE,
|
||||
MyVisionTVIE,
|
||||
NetPlusIE,
|
||||
OsnatelTVIE,
|
||||
QuantumTVIE,
|
||||
QuicklineIE,
|
||||
QuicklineLiveIE,
|
||||
SAKTVIE,
|
||||
VTXTVIE,
|
||||
WalyTVIE,
|
||||
ZattooIE,
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
@@ -20,6 +20,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
parse_count,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
@@ -56,6 +57,7 @@ class FacebookIE(InfoExtractor):
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
@@ -74,7 +76,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt posted a video to his Timeline.',
|
||||
'title': 're:^Asif Nawab Butt posted a video',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
@@ -132,7 +134,7 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
# have 1080P, but only up to 720p in swf params
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '0d9813160b146b3bc8744e006027fcc6',
|
||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
@@ -141,6 +143,7 @@ class FacebookIE(InfoExtractor):
|
||||
'upload_date': '20161030',
|
||||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
@@ -148,7 +151,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
|
||||
'title': 'md5:1db063d6a8c13faa8da727817339c857',
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
@@ -175,7 +178,7 @@ class FacebookIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '1396382447100162',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
|
||||
'title': 'md5:19a428bbde91364e3de815383b54a235',
|
||||
'timestamp': 1486035494,
|
||||
'upload_date': '20170202',
|
||||
'uploader': 'Elisabeth Ahtn',
|
||||
@@ -208,6 +211,17 @@ class FacebookIE(InfoExtractor):
|
||||
# no title
|
||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
||||
'info_dict': {
|
||||
'id': '359649331226507',
|
||||
'ext': 'mp4',
|
||||
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -226,7 +240,7 @@ class FacebookIE(InfoExtractor):
|
||||
return urls
|
||||
|
||||
def _login(self):
|
||||
(useremail, password) = self._get_login_info()
|
||||
useremail, password = self._get_login_info()
|
||||
if useremail is None:
|
||||
return
|
||||
|
||||
@@ -312,16 +326,18 @@ class FacebookIE(InfoExtractor):
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||
|
||||
def extract_from_jsmods_instances(js_data):
|
||||
if js_data:
|
||||
return extract_video_data(try_get(
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
|
||||
webpage, 'js data', default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
if server_js_data:
|
||||
video_data = extract_video_data(try_get(
|
||||
server_js_data, lambda x: x['jsmods']['instances'],
|
||||
list) or [])
|
||||
video_data = extract_from_jsmods_instances(server_js_data)
|
||||
|
||||
if not video_data:
|
||||
if not fatal_if_no_video:
|
||||
@@ -333,8 +349,35 @@ class FacebookIE(InfoExtractor):
|
||||
expected=True)
|
||||
elif '>You must log in to continue' in webpage:
|
||||
self.raise_login_required()
|
||||
else:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
# Video info not in first request, do a secondary request using
|
||||
# tahoe player specific URL
|
||||
tahoe_data = self._download_webpage(
|
||||
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
|
||||
data=urlencode_postdata({
|
||||
'__a': 1,
|
||||
'__pc': self._search_regex(
|
||||
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
|
||||
'pkg cohort', default='PHASED:DEFAULT'),
|
||||
'__rev': self._search_regex(
|
||||
r'client_revision["\']\s*:\s*(\d+),', webpage,
|
||||
'client revision', default='3944515'),
|
||||
'fb_dtsg': self._search_regex(
|
||||
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
|
||||
webpage, 'dtsg token', default=''),
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
tahoe_js_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
|
||||
'tahoe js data', default='{}'),
|
||||
video_id, fatal=False)
|
||||
video_data = extract_from_jsmods_instances(tahoe_js_data)
|
||||
|
||||
if not video_data:
|
||||
raise ExtractorError('Cannot parse data')
|
||||
|
||||
formats = []
|
||||
for f in video_data:
|
||||
@@ -380,12 +423,17 @@ class FacebookIE(InfoExtractor):
|
||||
video_title = 'Facebook video #%s' % video_id
|
||||
uploader = clean_html(get_element_by_id(
|
||||
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
|
||||
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
|
||||
fatal=False) or self._og_search_title(webpage, fatal=False)
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
view_count = parse_count(self._search_regex(
|
||||
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
|
||||
default=None))
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
@@ -393,6 +441,7 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': uploader,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
return webpage, info_dict
|
||||
|
@@ -46,7 +46,7 @@ class FC2IE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
return False
|
||||
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = f.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
|
@@ -3,15 +3,45 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FourTubeBaseIE(InfoExtractor):
|
||||
_TKN_HOST = 'tkn.kodicdn.com'
|
||||
|
||||
def _extract_formats(self, url, video_id, media_id, sources):
|
||||
token_url = 'https://%s/%s/desktop/%s' % (
|
||||
self._TKN_HOST, media_id, '+'.join(sources))
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||
'Referer': url,
|
||||
})
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
|
||||
@@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor):
|
||||
media_id = params[0]
|
||||
sources = ['%s' % p for p in params[2]]
|
||||
|
||||
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||
media_id, '+'.join(sources))
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||
'Referer': url,
|
||||
})
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
self._sort_formats(formats)
|
||||
formats = self._extract_formats(url, video_id, media_id, sources)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE):
|
||||
class PornTubeIE(FourTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||
_URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
|
||||
_TKN_HOST = 'tkn.porntube.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
|
||||
'info_dict': {
|
||||
@@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Teen couple doing anal',
|
||||
'uploader': 'Alexy',
|
||||
'uploader_id': 'Alexy',
|
||||
'uploader_id': '91488',
|
||||
'upload_date': '20150606',
|
||||
'timestamp': 1433595647,
|
||||
'duration': 5052,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
|
||||
'info_dict': {
|
||||
'id': '1331406',
|
||||
'ext': 'mp4',
|
||||
'title': 'Squirting Teen Ballerina on ECG',
|
||||
'uploader': 'Exploited College Girls',
|
||||
'uploader_id': '665',
|
||||
'channel': 'Exploited College Girls',
|
||||
'channel_id': '665',
|
||||
'upload_date': '20130920',
|
||||
'timestamp': 1379685485,
|
||||
'duration': 851,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
@@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'data', group='value'), video_id,
|
||||
transform_source=lambda x: compat_urllib_parse_unquote(
|
||||
compat_b64decode(x).decode('utf-8')))['page']['video']
|
||||
|
||||
title = video['title']
|
||||
media_id = video['mediaId']
|
||||
sources = [compat_str(e['height'])
|
||||
for e in video['encodings'] if e.get('height')]
|
||||
formats = self._extract_formats(url, video_id, media_id, sources)
|
||||
|
||||
thumbnail = url_or_none(video.get('masterThumb'))
|
||||
uploader = try_get(video, lambda x: x['user']['username'], compat_str)
|
||||
uploader_id = str_or_none(try_get(
|
||||
video, lambda x: x['user']['id'], int))
|
||||
channel = try_get(video, lambda x: x['channel']['name'], compat_str)
|
||||
channel_id = str_or_none(try_get(
|
||||
video, lambda x: x['channel']['id'], int))
|
||||
like_count = int_or_none(video.get('likes'))
|
||||
dislike_count = int_or_none(video.get('dislikes'))
|
||||
view_count = int_or_none(video.get('playsQty'))
|
||||
duration = int_or_none(video.get('durationInSeconds'))
|
||||
timestamp = unified_timestamp(video.get('publishedAt'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader or channel,
|
||||
'uploader_id': uploader_id or channel_id,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class PornerBrosIE(FourTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||
|
@@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
@@ -68,21 +76,41 @@ class FoxNewsIE(AMPIE):
|
||||
|
||||
|
||||
class FoxNewsArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:article'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# data-video-id
|
||||
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
|
||||
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
|
||||
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
|
||||
'info_dict': {
|
||||
'id': '5116295019001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump and Clinton asked to defend positions on Iraq War',
|
||||
'description': 'Veterans react on \'The Kelly File\'',
|
||||
'timestamp': 1473299755,
|
||||
'timestamp': 1473301045,
|
||||
'upload_date': '20160908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
# iframe embed
|
||||
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
|
||||
'info_dict': {
|
||||
'id': '5748266721001',
|
||||
'ext': 'flv',
|
||||
'title': 'Kyle Kashuv has a positive message for the Trump White House',
|
||||
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 229,
|
||||
'timestamp': 1520594670,
|
||||
'upload_date': '20180309',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@@ -90,51 +118,10 @@ class FoxNewsArticleIE(InfoExtractor):
|
||||
|
||||
video_id = self._html_search_regex(
|
||||
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
|
||||
webpage, 'video ID', group='id')
|
||||
webpage, 'video ID', group='id', default=None)
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
|
||||
|
||||
return self.url_result(
|
||||
'http://video.foxnews.com/v/' + video_id,
|
||||
FoxNewsIE.ie_key())
|
||||
|
||||
|
||||
class FoxNewsInsiderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
|
||||
IE_NAME = 'foxnews:insider'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'md5': 'a10c755e582d28120c62749b4feb4c0c',
|
||||
'info_dict': {
|
||||
'id': '5099377331001',
|
||||
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
|
||||
'ext': 'mp4',
|
||||
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
|
||||
'description': 'Is campus censorship getting out of control?',
|
||||
'timestamp': 1472168725,
|
||||
'upload_date': '20160825',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FoxNewsIE.ie_key()],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': FoxNewsIE.ie_key(),
|
||||
'url': embed_url,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
}
|
||||
FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
|
||||
|
@@ -1,43 +1,33 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'bwduI3X_TgUB',
|
||||
'id': '432609859715',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
'upload_date': '20150423',
|
||||
'timestamp': 1429761109,
|
||||
# TODO: fix timestamp
|
||||
'upload_date': '19700101', # '20150423',
|
||||
# 'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
||||
webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
config['releaseURL'], {
|
||||
'mbr': 'true',
|
||||
'switch': 'http',
|
||||
}), {'force_smil_url': True}))
|
||||
return self.url_result(
|
||||
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||
|
@@ -16,6 +16,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from .dailymotion import DailymotionIE
|
||||
|
||||
@@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
|
||||
|
||||
def sign(manifest_url, manifest_id):
|
||||
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
|
||||
signed_url = self._download_webpage(
|
||||
signed_url = url_or_none(self._download_webpage(
|
||||
'https://%s/esi/TA' % host, video_id,
|
||||
'Downloading signed %s manifest URL' % manifest_id,
|
||||
fatal=False, query={
|
||||
'url': manifest_url,
|
||||
})
|
||||
if (signed_url and isinstance(signed_url, compat_str) and
|
||||
re.search(r'^(?:https?:)?//', signed_url)):
|
||||
}))
|
||||
if signed_url:
|
||||
return signed_url
|
||||
return manifest_url
|
||||
|
||||
@@ -379,6 +379,31 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||
return self._make_url_result(video_id, catalogue)
|
||||
|
||||
|
||||
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
|
||||
IE_NAME = 'sport.francetvinfo.fr'
|
||||
_VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
|
||||
'info_dict': {
|
||||
'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
|
||||
'ext': 'mp4',
|
||||
'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
|
||||
'timestamp': 1523639962,
|
||||
'upload_date': '20180413',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
|
||||
return self._make_url_result(video_id, 'Sport-web')
|
||||
|
||||
|
||||
class GenerationWhatIE(InfoExtractor):
|
||||
IE_NAME = 'france2.fr:generation-what'
|
||||
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||
|
263
youtube_dl/extractor/frontendmasters.py
Normal file
263
youtube_dl/extractor/frontendmasters.py
Normal file
@@ -0,0 +1,263 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class FrontendMastersBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
|
||||
_LOGIN_URL = 'https://frontendmasters.com/login/'
|
||||
|
||||
_NETRC_MACHINE = 'frontendmasters'
|
||||
|
||||
_QUALITIES = {
|
||||
'low': {'width': 480, 'height': 360},
|
||||
'mid': {'width': 1280, 'height': 720},
|
||||
'high': {'width': 1920, 'height': 1080}
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None, 'Downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password
|
||||
})
|
||||
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
|
||||
'post_url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Content-Type': 'application/x-www-form-urlencoded'})
|
||||
|
||||
# Successful login
|
||||
if any(p in response for p in (
|
||||
'wp-login.php?action=logout', '>Logout')):
|
||||
return
|
||||
|
||||
error = self._html_search_regex(
|
||||
r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
|
||||
response, 'error message', default=None, group='error')
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
|
||||
def _download_course(self, course_name, url):
|
||||
return self._download_json(
|
||||
'%s/courses/%s' % (self._API_BASE, course_name), course_name,
|
||||
'Downloading course JSON', headers={'Referer': url})
|
||||
|
||||
@staticmethod
|
||||
def _extract_chapters(course):
|
||||
chapters = []
|
||||
lesson_elements = course.get('lessonElements')
|
||||
if isinstance(lesson_elements, list):
|
||||
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
|
||||
return chapters
|
||||
|
||||
@staticmethod
|
||||
def _extract_lesson(chapters, lesson_id, lesson):
|
||||
title = lesson.get('title') or lesson_id
|
||||
display_id = lesson.get('slug')
|
||||
description = lesson.get('description')
|
||||
thumbnail = lesson.get('thumbnail')
|
||||
|
||||
chapter_number = None
|
||||
index = lesson.get('index')
|
||||
element_index = lesson.get('elementIndex')
|
||||
if (isinstance(index, int) and isinstance(element_index, int) and
|
||||
index < element_index):
|
||||
chapter_number = element_index - index
|
||||
chapter = (chapters[chapter_number - 1]
|
||||
if chapter_number - 1 < len(chapters) else None)
|
||||
|
||||
duration = None
|
||||
timestamp = lesson.get('timestamp')
|
||||
if isinstance(timestamp, compat_str):
|
||||
mobj = re.search(
|
||||
r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
|
||||
timestamp)
|
||||
if mobj:
|
||||
duration = parse_duration(mobj.group('end')) - parse_duration(
|
||||
mobj.group('start'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'frontendmasters:%s' % lesson_id,
|
||||
'ie_key': FrontendMastersIE.ie_key(),
|
||||
'id': lesson_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersIE(FrontendMastersBaseIE):
|
||||
_VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
|
||||
'md5': '7f161159710d6b7016a4f4af6fcb05e2',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'ext': 'mp4',
|
||||
'title': 'a2qogef6ba',
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}, {
|
||||
'url': 'frontendmasters:a2qogef6ba',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lesson_id = self._match_id(url)
|
||||
|
||||
source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
|
||||
|
||||
formats = []
|
||||
for ext in ('webm', 'mp4'):
|
||||
for quality in ('low', 'mid', 'high'):
|
||||
resolution = self._QUALITIES[quality].copy()
|
||||
format_id = '%s-%s' % (ext, quality)
|
||||
format_url = self._download_json(
|
||||
source_url, lesson_id,
|
||||
'Downloading %s source JSON' % format_id, query={
|
||||
'f': ext,
|
||||
'r': resolution['height'],
|
||||
}, headers={
|
||||
'Referer': url,
|
||||
}, fatal=False)['url']
|
||||
|
||||
if not format_url:
|
||||
continue
|
||||
|
||||
f = resolution.copy()
|
||||
f.update({
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {
|
||||
'en': [{
|
||||
'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
'id': lesson_id,
|
||||
'title': lesson_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/tools',
|
||||
'info_dict': {
|
||||
'id': 'a2qogef6ba',
|
||||
'display_id': 'tools',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tools',
|
||||
'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'chapter': 'Introduction',
|
||||
'chapter_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
lesson_id, lesson = next(
|
||||
(video_id, data)
|
||||
for video_id, data in course['lessonData'].items()
|
||||
if data.get('slug') == lesson_name)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
return self._extract_lesson(chapters, lesson_id, lesson)
|
||||
|
||||
|
||||
class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'https://frontendmasters.com/courses/web-development/',
|
||||
'info_dict': {
|
||||
'id': 'web-development',
|
||||
'title': 'Introduction to Web Development',
|
||||
'description': 'md5:9317e6e842098bf725d62360e52d49a6',
|
||||
},
|
||||
'playlist_count': 81,
|
||||
'skip': 'Requires FrontendMasters account credentials',
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if FrontendMastersLessonIE.suitable(url) else super(
|
||||
FrontendMastersBaseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_name = self._match_id(url)
|
||||
|
||||
course = self._download_course(course_name, url)
|
||||
|
||||
chapters = self._extract_chapters(course)
|
||||
|
||||
lessons = sorted(
|
||||
course['lessonData'].values(), key=lambda data: data['index'])
|
||||
|
||||
entries = []
|
||||
for lesson in lessons:
|
||||
lesson_name = lesson.get('slug')
|
||||
if not lesson_name:
|
||||
continue
|
||||
lesson_id = lesson.get('hash') or lesson.get('statsId')
|
||||
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
|
||||
|
||||
title = course.get('title')
|
||||
description = course.get('description')
|
||||
|
||||
return self.playlist_result(entries, course_name, title, description)
|
@@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
username, password = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
try:
|
||||
|
@@ -1,10 +1,12 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
@@ -12,6 +14,19 @@ from ..utils import (
|
||||
|
||||
|
||||
class FunkBaseIE(InfoExtractor):
|
||||
_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
|
||||
}
|
||||
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
|
||||
|
||||
@staticmethod
|
||||
def _make_headers(referer):
|
||||
headers = FunkBaseIE._HEADERS.copy()
|
||||
headers['Referer'] = referer
|
||||
return headers
|
||||
|
||||
def _make_url_result(self, video):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
@@ -48,19 +63,19 @@ class FunkMixIE(FunkBaseIE):
|
||||
|
||||
lists = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/curation/curatedLists/',
|
||||
mix_id, headers={
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
|
||||
'Referer': url,
|
||||
}, query={
|
||||
mix_id, headers=self._make_headers(url), query={
|
||||
'size': 100,
|
||||
})['result']['lists']
|
||||
})['_embedded']['curatedListList']
|
||||
|
||||
metas = next(
|
||||
l for l in lists
|
||||
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
|
||||
video = next(
|
||||
meta['videoDataDelegate']
|
||||
for meta in metas if meta.get('alias') == alias)
|
||||
for meta in metas
|
||||
if try_get(
|
||||
meta, lambda x: x['videoDataDelegate']['alias'],
|
||||
compat_str) == alias)
|
||||
|
||||
return self._make_url_result(video)
|
||||
|
||||
@@ -104,25 +119,53 @@ class FunkChannelIE(FunkBaseIE):
|
||||
channel_id = mobj.group('id')
|
||||
alias = mobj.group('alias')
|
||||
|
||||
headers = {
|
||||
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
|
||||
'Referer': url,
|
||||
}
|
||||
headers = self._make_headers(url)
|
||||
|
||||
video = None
|
||||
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
|
||||
headers=headers, query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
# Id-based channels are currently broken on their side: webplayer
|
||||
# tries to process them via byChannelAlias endpoint and fails
|
||||
# predictably.
|
||||
for page_num in itertools.count():
|
||||
by_channel_alias = self._download_json(
|
||||
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
|
||||
% channel_id,
|
||||
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
|
||||
headers=headers, query={
|
||||
'filterFsk': 'false',
|
||||
'sort': 'creationDate,desc',
|
||||
'size': 100,
|
||||
'page': page_num,
|
||||
}, fatal=False)
|
||||
if not by_channel_alias:
|
||||
break
|
||||
video_list = try_get(
|
||||
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
|
||||
if not video_list:
|
||||
break
|
||||
try:
|
||||
video = next(r for r in video_list if r.get('alias') == alias)
|
||||
break
|
||||
except StopIteration:
|
||||
pass
|
||||
if not try_get(
|
||||
by_channel_alias, lambda x: x['_links']['next']):
|
||||
break
|
||||
|
||||
if not video:
|
||||
by_id_list = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/byIdList',
|
||||
channel_id, 'Downloading byIdList JSON', headers=headers,
|
||||
query={
|
||||
'ids': alias,
|
||||
}, fatal=False)
|
||||
if by_id_list:
|
||||
video = try_get(by_id_list, lambda x: x['result'][0], dict)
|
||||
|
||||
if not video:
|
||||
results = self._download_json(
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
|
||||
headers=headers, query={
|
||||
'https://www.funk.net/api/v3.0/content/videos/filter',
|
||||
channel_id, 'Downloading filter JSON', headers=headers, query={
|
||||
'channelId': channel_id,
|
||||
'size': 100,
|
||||
})['result']
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user