mirror of
https://github.com/ytdl-org/youtube-dl
synced 2025-10-18 06:08:36 +09:00
Compare commits
458 Commits
2015.03.15
...
2015.04.28
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3a0f0c263a | ||
![]() |
2419a376b9 | ||
![]() |
e206740fd7 | ||
![]() |
290a5a8d85 | ||
![]() |
e2dc351d25 | ||
![]() |
c86b61428b | ||
![]() |
40b96352c9 | ||
![]() |
189ba90996 | ||
![]() |
c8183e661d | ||
![]() |
053c94f1b3 | ||
![]() |
b9d76a9571 | ||
![]() |
a01cfc2951 | ||
![]() |
4eb5c65bee | ||
![]() |
06d07c4000 | ||
![]() |
74f8654a53 | ||
![]() |
9e105a858c | ||
![]() |
cd8a07a764 | ||
![]() |
aa49acd15a | ||
![]() |
642f23bd81 | ||
![]() |
2e24e6bd17 | ||
![]() |
2a09c1b8ab | ||
![]() |
a5ebf77d87 | ||
![]() |
b874495b1f | ||
![]() |
b860f5dfd4 | ||
![]() |
b19fc36c81 | ||
![]() |
d2d8248f68 | ||
![]() |
f54bab4d67 | ||
![]() |
bf6427d2fb | ||
![]() |
672f1bd849 | ||
![]() |
529d26c3e1 | ||
![]() |
857f00ed94 | ||
![]() |
e4a5e772f2 | ||
![]() |
a542e372ab | ||
![]() |
0d1bd5d62f | ||
![]() |
9f3fa89f7c | ||
![]() |
92995e6265 | ||
![]() |
a4196c3ea5 | ||
![]() |
db37e0c273 | ||
![]() |
d0aefec99a | ||
![]() |
66be4b89d7 | ||
![]() |
870744ce8f | ||
![]() |
2ad978532b | ||
![]() |
5090d93f2c | ||
![]() |
c8ff645766 | ||
![]() |
25f7d1beba | ||
![]() |
09aa111918 | ||
![]() |
10fb7710e8 | ||
![]() |
c0ea8ebb9b | ||
![]() |
31fd9c7601 | ||
![]() |
ddbed36455 | ||
![]() |
a9b0d4e1f4 | ||
![]() |
4d6a3ff411 | ||
![]() |
7fb993e1f4 | ||
![]() |
02f502f435 | ||
![]() |
4515cb43ca | ||
![]() |
d740333224 | ||
![]() |
c610f38ba9 | ||
![]() |
6447353f52 | ||
![]() |
b46ed49996 | ||
![]() |
cd9fdccde0 | ||
![]() |
2a8137272d | ||
![]() |
762155cc90 | ||
![]() |
f8610ba1ca | ||
![]() |
c99f4098c4 | ||
![]() |
3eec9fef30 | ||
![]() |
8c8826176d | ||
![]() |
14a2d6789f | ||
![]() |
7513f298b0 | ||
![]() |
c04c3e334c | ||
![]() |
f8e51f60b3 | ||
![]() |
33b066bda0 | ||
![]() |
14f41bc2fb | ||
![]() |
008bee0f50 | ||
![]() |
29492f3332 | ||
![]() |
bc94bd510b | ||
![]() |
9dd8e46a2d | ||
![]() |
8be2bdfabd | ||
![]() |
b4c0806963 | ||
![]() |
cc38fa6cfb | ||
![]() |
6de5dbafee | ||
![]() |
60bf45c80d | ||
![]() |
eb0f3e7ec0 | ||
![]() |
ed553379df | ||
![]() |
5c1e6f69c4 | ||
![]() |
757cda0a96 | ||
![]() |
e94443de80 | ||
![]() |
0954cd8aa4 | ||
![]() |
da55dac047 | ||
![]() |
13a11b195f | ||
![]() |
92dcba1e1c | ||
![]() |
2fe1b5bd2a | ||
![]() |
f91e1a8739 | ||
![]() |
24e21613b6 | ||
![]() |
c6391cd587 | ||
![]() |
006ce15a0c | ||
![]() |
edf4216119 | ||
![]() |
ae8953409e | ||
![]() |
bda44f31a1 | ||
![]() |
6621ca39a3 | ||
![]() |
14f7abfa71 | ||
![]() |
0f0b5736da | ||
![]() |
6728187ac0 | ||
![]() |
17c8675853 | ||
![]() |
cfbee8a431 | ||
![]() |
736785ab63 | ||
![]() |
3ded7bac16 | ||
![]() |
b524a001d6 | ||
![]() |
7b071e317b | ||
![]() |
a380509259 | ||
![]() |
c0dea0a782 | ||
![]() |
70947ea7b1 | ||
![]() |
81cd954a51 | ||
![]() |
feccf29c87 | ||
![]() |
5b5fbc0867 | ||
![]() |
f158799bbe | ||
![]() |
8b0e8990c2 | ||
![]() |
880ee801cf | ||
![]() |
163965d861 | ||
![]() |
6e218b3f9a | ||
![]() |
cc9b9df0b6 | ||
![]() |
31f224008e | ||
![]() |
f32cb5cb14 | ||
![]() |
fec2d97ca2 | ||
![]() |
f2eeafb061 | ||
![]() |
8f4e8bf280 | ||
![]() |
cc36e2295a | ||
![]() |
d47aeb2252 | ||
![]() |
14523ed969 | ||
![]() |
592e97e855 | ||
![]() |
53faa3ca5f | ||
![]() |
c62566971f | ||
![]() |
902be27cf9 | ||
![]() |
bf12cbe07c | ||
![]() |
f52e66505a | ||
![]() |
ca75235d3d | ||
![]() |
ecc6bd1341 | ||
![]() |
ce81b1411d | ||
![]() |
7691a7a3bd | ||
![]() |
214e74bf6f | ||
![]() |
c5826a491b | ||
![]() |
d8e7ef04dc | ||
![]() |
08f2a92c9c | ||
![]() |
3220c50f9a | ||
![]() |
024ebb2706 | ||
![]() |
954352c4c0 | ||
![]() |
4aec95f3c9 | ||
![]() |
be531ef1ec | ||
![]() |
65c1a750f5 | ||
![]() |
5141249c59 | ||
![]() |
6225984681 | ||
![]() |
5cb91ceaa5 | ||
![]() |
89c09e2a08 | ||
![]() |
fbbb219409 | ||
![]() |
820b064804 | ||
![]() |
355c524bfa | ||
![]() |
c052ce6cde | ||
![]() |
c9a779695d | ||
![]() |
a685ae511a | ||
![]() |
5edea45fab | ||
![]() |
8afff9f849 | ||
![]() |
a2043572aa | ||
![]() |
5d98908b26 | ||
![]() |
d6fd958c5f | ||
![]() |
d0eb724e22 | ||
![]() |
afe4a8c769 | ||
![]() |
9fc03aa87c | ||
![]() |
c798f15b98 | ||
![]() |
2dcc114f84 | ||
![]() |
0dfe9bc9d2 | ||
![]() |
4d1cdb5bfe | ||
![]() |
9c5335a027 | ||
![]() |
ae849ca170 | ||
![]() |
94c1255782 | ||
![]() |
476e1095fa | ||
![]() |
8da1bb0418 | ||
![]() |
01c58f8473 | ||
![]() |
edfcf7abe2 | ||
![]() |
37b44fe7c1 | ||
![]() |
8f02ad4f12 | ||
![]() |
51f1244600 | ||
![]() |
7bd930368c | ||
![]() |
fb69240ca0 | ||
![]() |
830d53bfae | ||
![]() |
c36a959549 | ||
![]() |
e91b2d14e3 | ||
![]() |
ac58e68bc3 | ||
![]() |
504c1cedfe | ||
![]() |
9a4d8fae82 | ||
![]() |
7d2ba6394c | ||
![]() |
b04b94da5f | ||
![]() |
9933857f67 | ||
![]() |
ed5641e249 | ||
![]() |
a4257017ef | ||
![]() |
18153f1b32 | ||
![]() |
7a91d1fc43 | ||
![]() |
af14ded75e | ||
![]() |
65939effb5 | ||
![]() |
66ee7b3234 | ||
![]() |
cd47a628fc | ||
![]() |
d7c78decb0 | ||
![]() |
8749477ed0 | ||
![]() |
7088f5b5fa | ||
![]() |
5bb6328cb9 | ||
![]() |
ce9f47de99 | ||
![]() |
4c4780c25e | ||
![]() |
64f1aba8f1 | ||
![]() |
3359fb661f | ||
![]() |
58a9f1b864 | ||
![]() |
6ac41a4ef5 | ||
![]() |
aa2af7ba74 | ||
![]() |
ce73839fe4 | ||
![]() |
1dc2726f8d | ||
![]() |
af76e8174d | ||
![]() |
402a3efc92 | ||
![]() |
372f08c990 | ||
![]() |
dd29eb7f81 | ||
![]() |
bca788ab1d | ||
![]() |
aef8fdba11 | ||
![]() |
0a1603634b | ||
![]() |
a662163fd5 | ||
![]() |
bd7a6478a2 | ||
![]() |
4a20c9f628 | ||
![]() |
418c5cc3fc | ||
![]() |
cc55d08832 | ||
![]() |
de5c545648 | ||
![]() |
a35099bd33 | ||
![]() |
5f4b5cf044 | ||
![]() |
beb10f843f | ||
![]() |
29713e4268 | ||
![]() |
8e4b83b96b | ||
![]() |
ae603b500e | ||
![]() |
d97aae7572 | ||
![]() |
a55e2f04a0 | ||
![]() |
6e53c91608 | ||
![]() |
d2272fcf6e | ||
![]() |
c7ac5dce8c | ||
![]() |
5c1d459ae9 | ||
![]() |
2e7daef502 | ||
![]() |
6410229681 | ||
![]() |
e40bd5f06b | ||
![]() |
06b491eb7b | ||
![]() |
3a9fadd6df | ||
![]() |
0de9312a7e | ||
![]() |
27fe5e3473 | ||
![]() |
f67dcc09f5 | ||
![]() |
fefc9d121d | ||
![]() |
a319c33d8b | ||
![]() |
218d6bcc05 | ||
![]() |
7d25463972 | ||
![]() |
aff84bec07 | ||
![]() |
ac651e974e | ||
![]() |
e21a55abcc | ||
![]() |
bc03228ab5 | ||
![]() |
f05d0e73c6 | ||
![]() |
aed2d4b31e | ||
![]() |
184a197441 | ||
![]() |
ed676e8c0a | ||
![]() |
8e1f937473 | ||
![]() |
1a68d39211 | ||
![]() |
4ba7d5b14c | ||
![]() |
1a48181a9f | ||
![]() |
6b70a4eb7d | ||
![]() |
f01855813b | ||
![]() |
4a3cdf81af | ||
![]() |
f777397aca | ||
![]() |
8fb2e5a4f5 | ||
![]() |
4e8cc1e973 | ||
![]() |
ff02a228e3 | ||
![]() |
424266abb1 | ||
![]() |
3fde134791 | ||
![]() |
7c39a65543 | ||
![]() |
8cf70de428 | ||
![]() |
15ac8413c7 | ||
![]() |
79c21abba7 | ||
![]() |
d5c418f29f | ||
![]() |
536b94e56f | ||
![]() |
5c29dbd0c7 | ||
![]() |
ba9e68f402 | ||
![]() |
e9f65f8749 | ||
![]() |
ae0dd4b298 | ||
![]() |
f1ce35af1a | ||
![]() |
6e617ed0b6 | ||
![]() |
7cf97daf77 | ||
![]() |
3d24d997ae | ||
![]() |
115c281672 | ||
![]() |
cce23e43a9 | ||
![]() |
ff556f5c09 | ||
![]() |
16fa01291b | ||
![]() |
01534bf54f | ||
![]() |
cd341b6e06 | ||
![]() |
185a7e25e7 | ||
![]() |
e81a474603 | ||
![]() |
ff2be6e180 | ||
![]() |
3da4b31359 | ||
![]() |
4bbeb19fc7 | ||
![]() |
a9cbab1735 | ||
![]() |
6b7556a554 | ||
![]() |
a3c7019e06 | ||
![]() |
416b9c29f7 | ||
![]() |
2ec8e04cac | ||
![]() |
e03bfb30ce | ||
![]() |
f5b669113f | ||
![]() |
d08225edf4 | ||
![]() |
8075d4f99d | ||
![]() |
1a944d8a2a | ||
![]() |
7cf02b6619 | ||
![]() |
55cde6ef3c | ||
![]() |
69c3af567d | ||
![]() |
60e1fe0079 | ||
![]() |
4669393070 | ||
![]() |
ce3bfe5d57 | ||
![]() |
2a0c2ca2b8 | ||
![]() |
c89fbfb385 | ||
![]() |
facecb84a1 | ||
![]() |
ed06e9949b | ||
![]() |
e15307a612 | ||
![]() |
5cbb2699ee | ||
![]() |
a2edf2e7ff | ||
![]() |
1d31e7a2fc | ||
![]() |
a2a4d5fa31 | ||
![]() |
a28ccbabc6 | ||
![]() |
edd7344820 | ||
![]() |
c808ef81bb | ||
![]() |
fd203fe357 | ||
![]() |
5bb7ab9928 | ||
![]() |
87270c8416 | ||
![]() |
ebc2f7a2db | ||
![]() |
7700207ec7 | ||
![]() |
4d5d14f5cf | ||
![]() |
72b249bf1f | ||
![]() |
9b4774b21b | ||
![]() |
2ddf083588 | ||
![]() |
8343a03357 | ||
![]() |
ad320e9b83 | ||
![]() |
ecb750a446 | ||
![]() |
5f88e02818 | ||
![]() |
616af2f4b9 | ||
![]() |
5a3b315b5f | ||
![]() |
b7a2268e7b | ||
![]() |
20d729228c | ||
![]() |
af8c93086c | ||
![]() |
79fd11ab8e | ||
![]() |
cb88671e37 | ||
![]() |
ff79552f13 | ||
![]() |
643fe72717 | ||
![]() |
4747e2183a | ||
![]() |
c59e701e35 | ||
![]() |
8e678af4ba | ||
![]() |
70a1165b32 | ||
![]() |
af14000215 | ||
![]() |
998e6cdba0 | ||
![]() |
2315fb5e5f | ||
![]() |
157e9e5aa5 | ||
![]() |
c496ec0848 | ||
![]() |
15b67a268a | ||
![]() |
31c4809827 | ||
![]() |
ac0df2350a | ||
![]() |
223b27f46c | ||
![]() |
425142be60 | ||
![]() |
7e17ec8c71 | ||
![]() |
448830ce7b | ||
![]() |
8896b614a9 | ||
![]() |
a7fce980ad | ||
![]() |
91757b0f37 | ||
![]() |
fbfcc2972b | ||
![]() |
db40364b87 | ||
![]() |
094ce39c45 | ||
![]() |
ae67d082fe | ||
![]() |
8f76df7f37 | ||
![]() |
5c19d18cbf | ||
![]() |
838b93405b | ||
![]() |
2676caf344 | ||
![]() |
17941321ab | ||
![]() |
48c971e073 | ||
![]() |
f5e2efbbf0 | ||
![]() |
5d1f0e607b | ||
![]() |
b0872c19ea | ||
![]() |
9f790b9901 | ||
![]() |
c41a2ec4af | ||
![]() |
575dad3c98 | ||
![]() |
32d687f55e | ||
![]() |
93f787070f | ||
![]() |
f9544f6e8f | ||
![]() |
336d19044c | ||
![]() |
7866c9e173 | ||
![]() |
1a4123de04 | ||
![]() |
cf2e2eb1c0 | ||
![]() |
2051acdeb2 | ||
![]() |
cefdf970cc | ||
![]() |
a1d0aa7b88 | ||
![]() |
49aeedb8cb | ||
![]() |
ef249a2cd7 | ||
![]() |
a09141548a | ||
![]() |
5379a2d40d | ||
![]() |
c9450c7ab1 | ||
![]() |
faa1b5c292 | ||
![]() |
393d9fc6d2 | ||
![]() |
4e6a228689 | ||
![]() |
179d6678b1 | ||
![]() |
85698c5086 | ||
![]() |
a7d9ded45d | ||
![]() |
531980d89c | ||
![]() |
1887ecd4d6 | ||
![]() |
cd32c2caba | ||
![]() |
1c9a1457fc | ||
![]() |
038b0eb1da | ||
![]() |
f20bf146e2 | ||
![]() |
01218f919b | ||
![]() |
2684871bc1 | ||
![]() |
ccf3960eec | ||
![]() |
eecc0685c9 | ||
![]() |
2ed849eccf | ||
![]() |
3378d67a18 | ||
![]() |
f3c0c667a6 | ||
![]() |
0ae8bbac2d | ||
![]() |
cbc3cfcab4 | ||
![]() |
b30ef07c6c | ||
![]() |
73900846b1 | ||
![]() |
d1dc7e3991 | ||
![]() |
3073a6d5e9 | ||
![]() |
aae53774f2 | ||
![]() |
7a757b7194 | ||
![]() |
fa8ce26904 | ||
![]() |
2c2c06e359 | ||
![]() |
ee580538fa | ||
![]() |
c3c5c31517 | ||
![]() |
ed9a25dd61 | ||
![]() |
9ef4f12b53 | ||
![]() |
84f8101606 | ||
![]() |
b1337948eb | ||
![]() |
98f02fdde2 | ||
![]() |
048fdc2292 | ||
![]() |
2ca1c5aa9f | ||
![]() |
674fb0fcc5 | ||
![]() |
00bfe40e4d | ||
![]() |
cd459b1d49 | ||
![]() |
92a4793b3c | ||
![]() |
dc03a42537 | ||
![]() |
219da6bb68 | ||
![]() |
0499cd866e | ||
![]() |
13047f4135 | ||
![]() |
af69cab21d | ||
![]() |
d41a3fa1b4 | ||
![]() |
733be371af | ||
![]() |
576904bce6 | ||
![]() |
cf47794f09 | ||
![]() |
c06a9f8730 | ||
![]() |
2e90dff2c2 | ||
![]() |
90183a46d8 | ||
![]() |
b68eedba23 | ||
![]() |
d5b559393b | ||
![]() |
2cb434e53e | ||
![]() |
cd65491c30 | ||
![]() |
a172d96292 | ||
![]() |
55969016e9 | ||
![]() |
5c7495a194 | ||
![]() |
5ee6fc974e | ||
![]() |
63fc800057 |
9
AUTHORS
9
AUTHORS
@@ -115,3 +115,12 @@ Leslie P. Polzer
|
||||
Duncan Keall
|
||||
Alexander Mamay
|
||||
Devin J. Pohly
|
||||
Eduardo Ferro Aldama
|
||||
Jeff Buchbinder
|
||||
Amish Bhadeshia
|
||||
Joram Schrijver
|
||||
Will W.
|
||||
Mohammad Teimori Pabandi
|
||||
Roman Le Négrate
|
||||
Matthias Küch
|
||||
Julian Richen
|
||||
|
2
Makefile
2
Makefile
@@ -2,7 +2,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
|
||||
find -name "*.pyc" -delete
|
||||
find . -name "*.pyc" -delete
|
||||
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR ?= $(PREFIX)/bin
|
||||
|
234
README.md
234
README.md
@@ -5,6 +5,7 @@ youtube-dl - download videos from youtube.com or other video platforms
|
||||
- [OPTIONS](#options)
|
||||
- [CONFIGURATION](#configuration)
|
||||
- [OUTPUT TEMPLATE](#output-template)
|
||||
- [FORMAT SELECTION](#format-selection)
|
||||
- [VIDEO SELECTION](#video-selection)
|
||||
- [FAQ](#faq)
|
||||
- [DEVELOPER INSTRUCTIONS](#developer-instructions)
|
||||
@@ -45,21 +46,21 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
# OPTIONS
|
||||
-h, --help print this help text and exit
|
||||
--version print program version and exit
|
||||
-U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
|
||||
-i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist
|
||||
-h, --help Print this help text and exit
|
||||
--version Print program version and exit
|
||||
-U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)
|
||||
-i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist
|
||||
--abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs
|
||||
--dump-user-agent display the current browser identification
|
||||
--dump-user-agent Display the current browser identification
|
||||
--list-extractors List all supported extractors and the URLs they would handle
|
||||
--extractor-descriptions Output descriptions of all supported extractors
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||
--default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple".
|
||||
Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The
|
||||
default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.
|
||||
--ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration
|
||||
in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
|
||||
--flat-playlist Do not extract the videos of a playlist, only list them.
|
||||
--no-color Do not emit color codes in output.
|
||||
--no-color Do not emit color codes in output
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection
|
||||
@@ -71,70 +72,70 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
not present) is used for the actual downloading. (experimental)
|
||||
|
||||
## Video Selection:
|
||||
--playlist-start NUMBER playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||
--playlist-start NUMBER Playlist video to start at (default is 1)
|
||||
--playlist-end NUMBER Playlist video to end at (default is last)
|
||||
--playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8"
|
||||
if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will
|
||||
download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.
|
||||
--match-title REGEX download only matching titles (regex or caseless sub-string)
|
||||
--reject-title REGEX skip download for matching titles (regex or caseless sub-string)
|
||||
--match-title REGEX Download only matching titles (regex or caseless sub-string)
|
||||
--reject-title REGEX Skip download for matching titles (regex or caseless sub-string)
|
||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m)
|
||||
--date DATE download only videos uploaded in this date
|
||||
--datebefore DATE download only videos uploaded on or before this date (i.e. inclusive)
|
||||
--dateafter DATE download only videos uploaded on or after this date (i.e. inclusive)
|
||||
--date DATE Download only videos uploaded in this date
|
||||
--datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive)
|
||||
--dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive)
|
||||
--min-views COUNT Do not download any videos with less than COUNT views
|
||||
--max-views COUNT Do not download any videos with more than COUNT views
|
||||
--match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present,
|
||||
--match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present,
|
||||
!key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against
|
||||
a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the
|
||||
operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike
|
||||
functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 &
|
||||
dislike_count <? 50 & description" .
|
||||
--no-playlist If the URL refers to a video and a playlist, download only the video.
|
||||
--yes-playlist If the URL refers to a video and a playlist, download the playlist.
|
||||
--age-limit YEARS download only videos suitable for the given age
|
||||
--no-playlist Download only the video, if the URL refers to a video and a playlist.
|
||||
--yes-playlist Download the playlist, if the URL refers to a video and a playlist.
|
||||
--age-limit YEARS Download only videos suitable for the given age
|
||||
--download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
|
||||
--include-ads Download advertisements as well (experimental)
|
||||
|
||||
## Download Options:
|
||||
-r, --rate-limit LIMIT maximum download rate in bytes per second (e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES number of retries (default is 10), or "infinite".
|
||||
--buffer-size SIZE size of download buffer (e.g. 1024 or 16K) (default is 1024)
|
||||
--no-resize-buffer do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
|
||||
-r, --rate-limit LIMIT Maximum download rate in bytes per second (e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES Number of retries (default is 10), or "infinite".
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) (default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.
|
||||
--playlist-reverse Download playlist videos in reverse order
|
||||
--xattr-set-filesize (experimental) set file xattribute ytdl.filesize with expected filesize
|
||||
--hls-prefer-native (experimental) Use the native HLS downloader instead of ffmpeg.
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected filesize (experimental)
|
||||
--hls-prefer-native Use the native HLS downloader instead of ffmpeg (experimental)
|
||||
--external-downloader COMMAND Use the specified external downloader. Currently supports aria2c,curl,wget
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader.
|
||||
--external-downloader-args ARGS Give these arguments to the external downloader
|
||||
|
||||
## Filesystem Options:
|
||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||
--id use only video ID in file name
|
||||
-o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
|
||||
-a, --batch-file FILE File containing URLs to download ('-' for stdin)
|
||||
--id Use only video ID in file name
|
||||
-o, --output TEMPLATE Output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader
|
||||
nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(format)s for
|
||||
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like Youtube's itags: "137"),
|
||||
the format description (like "22 - 1280x720" or "HD"), %(format_id)s for the unique id of the format (like YouTube's itags: "137"),
|
||||
%(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id,
|
||||
%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in,
|
||||
%(playlist_index)s for the position in the playlist. %(height)s and %(width)s for the width and height of the video format.
|
||||
%(resolution)s for a textual description of the resolution of the video format. %% for a literal percent. Use - to output to stdout.
|
||||
Can also be used to download to a different directory, for example with -o '/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s' .
|
||||
--autonumber-size NUMBER Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
|
||||
--autonumber-size NUMBER Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given
|
||||
--restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames
|
||||
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000
|
||||
-t, --title [deprecated] use title in file name (default)
|
||||
-l, --literal [deprecated] alias of --title
|
||||
-w, --no-overwrites do not overwrite files
|
||||
-c, --continue force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
|
||||
--no-continue do not resume partially downloaded files (restart from beginning)
|
||||
--no-part do not use .part files - write directly into output file
|
||||
--no-mtime do not use the Last-modified header to set the file modification time
|
||||
--write-description write video description to a .description file
|
||||
--write-info-json write video metadata to a .info.json file
|
||||
--write-annotations write video annotations to a .annotation file
|
||||
--load-info FILE json file containing the video information (created with the "--write-json" option)
|
||||
--cookies FILE file to read cookies from and dump cookie jar in
|
||||
-A, --auto-number [deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000
|
||||
-t, --title [deprecated] Use title in file name (default)
|
||||
-l, --literal [deprecated] Alias of --title
|
||||
-w, --no-overwrites Do not overwrite files
|
||||
-c, --continue Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.
|
||||
--no-continue Do not resume partially downloaded files (restart from beginning)
|
||||
--no-part Do not use .part files - write directly into output file
|
||||
--no-mtime Do not use the Last-modified header to set the file modification time
|
||||
--write-description Write video description to a .description file
|
||||
--write-info-json Write video metadata to a .info.json file
|
||||
--write-annotations Write video annotations to a .annotation file
|
||||
--load-info FILE JSON file containing the video information (created with the "--write-info-json" option)
|
||||
--cookies FILE File to read cookies from and dump cookie jar in
|
||||
--cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl
|
||||
or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may
|
||||
change.
|
||||
@@ -142,96 +143,86 @@ which means you can modify it, redistribute it or use it however you like.
|
||||
--rm-cache-dir Delete all filesystem cache files
|
||||
|
||||
## Thumbnail images:
|
||||
--write-thumbnail write thumbnail image to disk
|
||||
--write-all-thumbnails write all thumbnail image formats to disk
|
||||
--write-thumbnail Write thumbnail image to disk
|
||||
--write-all-thumbnails Write all thumbnail image formats to disk
|
||||
--list-thumbnails Simulate and list all available thumbnail formats
|
||||
|
||||
## Verbosity / Simulation Options:
|
||||
-q, --quiet activates quiet mode
|
||||
-q, --quiet Activate quiet mode
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate do not download the video and do not write anything to disk
|
||||
--skip-download do not download the video
|
||||
-g, --get-url simulate, quiet but print URL
|
||||
-e, --get-title simulate, quiet but print title
|
||||
--get-id simulate, quiet but print id
|
||||
--get-thumbnail simulate, quiet but print thumbnail URL
|
||||
--get-description simulate, quiet but print video description
|
||||
--get-duration simulate, quiet but print video length
|
||||
--get-filename simulate, quiet but print output filename
|
||||
--get-format simulate, quiet but print output format
|
||||
-j, --dump-json simulate, quiet but print JSON information. See --output for a description of available keys.
|
||||
-J, --dump-single-json simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
|
||||
-s, --simulate Do not download the video and do not write anything to disk
|
||||
--skip-download Do not download the video
|
||||
-g, --get-url Simulate, quiet but print URL
|
||||
-e, --get-title Simulate, quiet but print title
|
||||
--get-id Simulate, quiet but print id
|
||||
--get-thumbnail Simulate, quiet but print thumbnail URL
|
||||
--get-description Simulate, quiet but print video description
|
||||
--get-duration Simulate, quiet but print video length
|
||||
--get-filename Simulate, quiet but print output filename
|
||||
--get-format Simulate, quiet but print output format
|
||||
-j, --dump-json Simulate, quiet but print JSON information. See --output for a description of available keys.
|
||||
-J, --dump-single-json Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist
|
||||
information in a single line.
|
||||
--print-json Be quiet and print the video information as JSON (video is still being downloaded).
|
||||
--newline output progress bar as new lines
|
||||
--no-progress do not print progress bar
|
||||
--console-title display progress in console titlebar
|
||||
-v, --verbose print various debugging information
|
||||
--dump-pages print downloaded pages to debug problems (very verbose)
|
||||
--newline Output progress bar as new lines
|
||||
--no-progress Do not print progress bar
|
||||
--console-title Display progress in console titlebar
|
||||
-v, --verbose Print various debugging information
|
||||
--dump-pages Print downloaded pages to debug problems (very verbose)
|
||||
--write-pages Write downloaded intermediary pages to files in the current directory to debug problems
|
||||
--print-traffic Display sent and read HTTP traffic
|
||||
-C, --call-home Contact the youtube-dl server for debugging.
|
||||
--no-call-home Do NOT contact the youtube-dl server for debugging.
|
||||
-C, --call-home Contact the youtube-dl server for debugging
|
||||
--no-call-home Do NOT contact the youtube-dl server for debugging
|
||||
|
||||
## Workarounds:
|
||||
--encoding ENCODING Force the specified encoding (experimental)
|
||||
--no-check-certificate Suppress HTTPS certificate validation.
|
||||
--no-check-certificate Suppress HTTPS certificate validation
|
||||
--prefer-insecure Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)
|
||||
--user-agent UA specify a custom user agent
|
||||
--referer URL specify a custom referer, use if the video access is restricted to one domain
|
||||
--add-header FIELD:VALUE specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
|
||||
--user-agent UA Specify a custom user agent
|
||||
--referer URL Specify a custom referer, use if the video access is restricted to one domain
|
||||
--add-header FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ':'. You can use this option multiple times
|
||||
--bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH
|
||||
--sleep-interval SECONDS Number of seconds to sleep before each download.
|
||||
|
||||
## Video Format Options:
|
||||
-f, --format FORMAT video format code, specify the order of preference using slashes, as in -f 22/17/18 . Instead of format codes, you can select by
|
||||
extension for the extensions aac, m4a, mp3, mp4, ogg, wav, webm. You can also use the special names "best", "bestvideo", "bestaudio",
|
||||
"worst". You can filter the video results by putting a condition in brackets, as in -f "best[height=720]" (or -f "[filesize>10M]").
|
||||
This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec,
|
||||
vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a
|
||||
question mark (?) after the operator. You can combine format filters, so -f "[height <=? 720][tbr>500]" selects up to 720p videos
|
||||
(or videos where the height is not known) with a bitrate of at least 500 KBit/s. By default, youtube-dl will pick the best quality.
|
||||
Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio
|
||||
of two formats into a single file using -f <video-format>+<audio-format> (requires ffmpeg or avconv), for example -f
|
||||
bestvideo+bestaudio.
|
||||
--all-formats download all available video formats
|
||||
--prefer-free-formats prefer free video formats unless a specific one is requested
|
||||
--max-quality FORMAT highest quality format to download
|
||||
-F, --list-formats list all available formats
|
||||
-f, --format FORMAT Video format code, see the "FORMAT SELECTION" for all the info
|
||||
--all-formats Download all available video formats
|
||||
--prefer-free-formats Prefer free video formats unless a specific one is requested
|
||||
-F, --list-formats List all available formats
|
||||
--youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos
|
||||
--merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no
|
||||
merge is required
|
||||
|
||||
## Subtitle Options:
|
||||
--write-sub write subtitle file
|
||||
--write-auto-sub write automatic subtitle file (youtube only)
|
||||
--all-subs downloads all the available subtitles of the video
|
||||
--list-subs lists all available subtitles for the video
|
||||
--sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best"
|
||||
--sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
|
||||
--write-sub Write subtitle file
|
||||
--write-auto-sub Write automatic subtitle file (YouTube only)
|
||||
--all-subs Download all the available subtitles of the video
|
||||
--list-subs List all available subtitles for the video
|
||||
--sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"
|
||||
--sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt'
|
||||
|
||||
## Authentication Options:
|
||||
-u, --username USERNAME login with this account ID
|
||||
-p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR two-factor auth code
|
||||
-n, --netrc use .netrc authentication data
|
||||
--video-password PASSWORD video password (vimeo, smotri)
|
||||
-u, --username USERNAME Login with this account ID
|
||||
-p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively.
|
||||
-2, --twofactor TWOFACTOR Two-factor auth code
|
||||
-n, --netrc Use .netrc authentication data
|
||||
--video-password PASSWORD Video password (vimeo, smotri)
|
||||
|
||||
## Post-processing Options:
|
||||
-x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||
--audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K
|
||||
(default 5)
|
||||
-x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)
|
||||
--audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default
|
||||
--audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default
|
||||
5)
|
||||
--recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)
|
||||
-k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default
|
||||
--no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||
--embed-subs embed subtitles in the video (only for mp4 videos)
|
||||
--embed-thumbnail embed thumbnail in the audio as cover art
|
||||
--add-metadata write metadata to the video file
|
||||
--metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||
-k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default
|
||||
--no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default
|
||||
--embed-subs Embed subtitles in the video (only for mp4 videos)
|
||||
--embed-thumbnail Embed thumbnail in the audio as cover art
|
||||
--add-metadata Write metadata to the video file
|
||||
--metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed
|
||||
parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s -
|
||||
%(title)s" matches a title like "Coldplay - Paradise"
|
||||
--xattrs write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||
--xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards)
|
||||
--fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default;
|
||||
fix file if we can, warn otherwise)
|
||||
--prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default)
|
||||
@@ -271,6 +262,17 @@ $ youtube-dl --get-filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filena
|
||||
youtube-dl_test_video_.mp4 # A simple file name
|
||||
```
|
||||
|
||||
# FORMAT SELECTION
|
||||
|
||||
By default youtube-dl tries to download the best quality, but sometimes you may want to download other format.
|
||||
The simplest case is requesting a specific format, for example `-f 22`. You can get the list of available formats using `--list-formats`, you can also use a file extension (currently it supports aac, m4a, mp3, mp4, ogg, wav, webm) or the special names `best`, `bestvideo`, `bestaudio` and `worst`.
|
||||
|
||||
If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes, as in `-f 22/17/18`. You can also filter the video results by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, vcodec, container, and protocol and the comparisons =, != . Formats for which the value is not known are excluded unless you put a question mark (?) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. Use commas to download multiple formats, such as `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv), for example `-f bestvideo+bestaudio`.
|
||||
|
||||
Since the end of April 2015 and version 2015.04.26 youtube-dl uses `-f bestvideo+bestaudio/best` as default format selection (see #5447, #5456). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some dash formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file.
|
||||
|
||||
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
|
||||
|
||||
# VIDEO SELECTION
|
||||
|
||||
Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`, they accept dates in two formats:
|
||||
@@ -321,9 +323,9 @@ YouTube changed their playlist format in March 2014 and later on, so you'll need
|
||||
|
||||
If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging guys](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
|
||||
|
||||
### Do I always have to pass in `--max-quality FORMAT`, or `-citw`?
|
||||
### Do I always have to pass `-citw`?
|
||||
|
||||
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, `--max-quality` *limits* the video quality (so if you want the best quality, do NOT pass it in), and the only option out of `-citw` that is regularly useful is `-i`.
|
||||
By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
|
||||
|
||||
### Can you please put the -b option back?
|
||||
|
||||
@@ -355,6 +357,22 @@ YouTube has switched to a new video info format in July 2011 which is not suppor
|
||||
|
||||
YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
||||
### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` ###
|
||||
|
||||
That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
|
||||
|
||||
For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
|
||||
|
||||
```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'```
|
||||
|
||||
or
|
||||
|
||||
```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc```
|
||||
|
||||
For Windows you have to use the double quotes:
|
||||
|
||||
```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"```
|
||||
|
||||
### ExtractorError: Could not find JS function u'OF'
|
||||
|
||||
In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
|
||||
|
@@ -28,7 +28,7 @@ for test in get_testcases():
|
||||
if METHOD == 'EURISTIC':
|
||||
try:
|
||||
webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
|
||||
except:
|
||||
except Exception:
|
||||
print('\nFail: {0}'.format(test['name']))
|
||||
continue
|
||||
|
||||
|
42
devscripts/generate_aes_testdata.py
Normal file
42
devscripts/generate_aes_testdata.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import codecs
|
||||
import subprocess
|
||||
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import intlist_to_bytes
|
||||
from youtube_dl.aes import aes_encrypt, key_expansion
|
||||
|
||||
secret_msg = b'Secret message goes here'
|
||||
|
||||
|
||||
def hex_str(int_list):
|
||||
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||
|
||||
|
||||
def openssl_encode(algo, key, iv):
|
||||
cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)]
|
||||
prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
out, _ = prog.communicate(secret_msg)
|
||||
return out
|
||||
|
||||
iv = key = [0x20, 0x15] + 14 * [0]
|
||||
|
||||
r = openssl_encode('aes-128-cbc', key, iv)
|
||||
print('aes_cbc_decrypt')
|
||||
print(repr(r))
|
||||
|
||||
password = key
|
||||
new_key = aes_encrypt(password, key_expansion(password))
|
||||
r = openssl_encode('aes-128-ctr', new_key, iv)
|
||||
print('aes_decrypt_text 16')
|
||||
print(repr(r))
|
||||
|
||||
password = key + 16 * [0]
|
||||
new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16)
|
||||
r = openssl_encode('aes-256-ctr', new_key, iv)
|
||||
print('aes_decrypt_text 32')
|
||||
print(repr(r))
|
@@ -2,6 +2,8 @@
|
||||
- **1tv**: Первый канал
|
||||
- **1up.com**
|
||||
- **220.ro**
|
||||
- **22tracks:genre**
|
||||
- **22tracks:track**
|
||||
- **24video**
|
||||
- **3sat**
|
||||
- **4tube**
|
||||
@@ -96,6 +98,7 @@
|
||||
- **CondeNast**: Condé Nast media group: Condé Nast, GQ, Glamour, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **Cracked**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSpan**: C-SPAN
|
||||
@@ -109,14 +112,17 @@
|
||||
- **DctpTv**
|
||||
- **DeezerPlaylist**
|
||||
- **defense.gouv.fr**
|
||||
- **DHM**: Filmarchiv - Deutsches Historisches Museum
|
||||
- **Discovery**
|
||||
- **divxstage**: DivxStage
|
||||
- **Dotsub**
|
||||
- **DouyuTV**
|
||||
- **DRBonanza**
|
||||
- **Dropbox**
|
||||
- **DrTuber**
|
||||
- **DRTV**
|
||||
- **Dump**
|
||||
- **Dumpert**
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **EaglePlatform**
|
||||
- **EbaumsWorld**
|
||||
@@ -161,12 +167,14 @@
|
||||
- **Gamekings**
|
||||
- **GameOne**
|
||||
- **gameone:playlist**
|
||||
- **Gamersyde**
|
||||
- **GameSpot**
|
||||
- **GameStar**
|
||||
- **Gametrailers**
|
||||
- **Gazeta**
|
||||
- **GDCVault**
|
||||
- **generic**: Generic downloader that works on some sites
|
||||
- **Gfycat**
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
@@ -174,7 +182,7 @@
|
||||
- **GodTube**
|
||||
- **GoldenMoustache**
|
||||
- **Golem**
|
||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in and fastvideo.in
|
||||
- **GorillaVid**: GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net
|
||||
- **Goshgay**
|
||||
- **Grooveshark**
|
||||
- **Groupon**
|
||||
@@ -230,6 +238,7 @@
|
||||
- **Letv**
|
||||
- **LetvPlaylist**
|
||||
- **LetvTv**
|
||||
- **Libsyn**
|
||||
- **lifenews**: LIFE | NEWS
|
||||
- **LiveLeak**
|
||||
- **livestream**
|
||||
@@ -244,11 +253,13 @@
|
||||
- **Malemotion**
|
||||
- **MDR**
|
||||
- **media.ccc.de**
|
||||
- **MegaVideoz**
|
||||
- **metacafe**
|
||||
- **Metacritic**
|
||||
- **Mgoon**
|
||||
- **Minhateca**
|
||||
- **MinistryGrid**
|
||||
- **miomio.tv**
|
||||
- **mitele.es**
|
||||
- **mixcloud**
|
||||
- **MLB**
|
||||
@@ -282,6 +293,8 @@
|
||||
- **NBA**
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **NBCSports**
|
||||
- **NBCSportsVPlayer**
|
||||
- **ndr**: NDR.de - Mediathek
|
||||
- **NDTV**
|
||||
- **NerdCubedFeed**
|
||||
@@ -309,6 +322,7 @@
|
||||
- **npo.nl:radio**
|
||||
- **npo.nl:radio:fragment**
|
||||
- **NRK**
|
||||
- **NRKPlaylist**
|
||||
- **NRKTV**
|
||||
- **ntv.ru**
|
||||
- **Nuvid**
|
||||
@@ -326,6 +340,7 @@
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Patreon**
|
||||
- **PBS**
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **Phoenix**
|
||||
- **Photobucket**
|
||||
- **Pladform**
|
||||
@@ -341,16 +356,22 @@
|
||||
- **PornHub**
|
||||
- **PornHubPlaylist**
|
||||
- **Pornotube**
|
||||
- **PornoVoisines**
|
||||
- **PornoXO**
|
||||
- **PrimeShareTV**
|
||||
- **PromptFile**
|
||||
- **prosiebensat1**: ProSiebenSat.1 Digital
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QQMusic**
|
||||
- **QQMusicAlbum**
|
||||
- **QQMusicSinger**
|
||||
- **QuickVid**
|
||||
- **R7**
|
||||
- **radio.de**
|
||||
- **radiobremen**
|
||||
- **radiofrance**
|
||||
- **RadioJavan**
|
||||
- **Rai**
|
||||
- **RBMARadio**
|
||||
- **RedTube**
|
||||
@@ -367,6 +388,7 @@
|
||||
- **RTP**
|
||||
- **RTS**: RTS.ch
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **RUHD**
|
||||
- **rutube**: Rutube videos
|
||||
@@ -375,6 +397,8 @@
|
||||
- **rutube:movie**: Rutube movies
|
||||
- **rutube:person**: Rutube person videos
|
||||
- **RUTV**: RUTV.RU
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **Sandia**: Sandia National Laboratories
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
@@ -384,6 +408,7 @@
|
||||
- **Screencast**
|
||||
- **ScreencastOMatic**
|
||||
- **ScreenwaveMedia**
|
||||
- **SenateISVP**
|
||||
- **ServingSys**
|
||||
- **Sexu**
|
||||
- **SexyKarma**: Sexy Karma and Watch Indian Porn
|
||||
@@ -406,8 +431,10 @@
|
||||
- **soundgasm**
|
||||
- **soundgasm:profile**
|
||||
- **southpark.cc.com**
|
||||
- **southpark.cc.com:español**
|
||||
- **southpark.de**
|
||||
- **Space**
|
||||
- **SpankBang**
|
||||
- **Spankwire**
|
||||
- **Spiegel**
|
||||
- **Spiegel:Article**: Articles on spiegel.de
|
||||
@@ -416,6 +443,7 @@
|
||||
- **Sport5**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **Srf**
|
||||
- **SRMediathek**: Saarländischer Rundfunk
|
||||
- **SSA**
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
@@ -438,7 +466,7 @@
|
||||
- **TeamFour**
|
||||
- **TechTalks**
|
||||
- **techtv.mit.edu**
|
||||
- **TED**
|
||||
- **ted**
|
||||
- **tegenlicht.vpro.nl**
|
||||
- **TeleBruxelles**
|
||||
- **telecinco.es**
|
||||
@@ -487,13 +515,17 @@
|
||||
- **Ubu**
|
||||
- **udemy**
|
||||
- **udemy:course**
|
||||
- **UDNEmbed**
|
||||
- **Ultimedia**
|
||||
- **Unistra**
|
||||
- **Urort**: NRK P3 Urørt
|
||||
- **ustream**
|
||||
- **ustream:channel**
|
||||
- **Varzesh3**
|
||||
- **Vbox7**
|
||||
- **VeeHD**
|
||||
- **Veoh**
|
||||
- **Vessel**
|
||||
- **Vesti**: Вести.Ru
|
||||
- **Vevo**
|
||||
- **VGTV**
|
||||
@@ -524,7 +556,7 @@
|
||||
- **vimeo:review**: Review pages on vimeo
|
||||
- **vimeo:user**
|
||||
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
|
||||
- **Vimple**: Vimple.ru
|
||||
- **Vimple**: Vimple - one-click video hosting
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **vk.com**
|
||||
@@ -582,7 +614,7 @@
|
||||
- **youtube:show**: YouTube.com (multi-season) shows
|
||||
- **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
|
@@ -150,7 +150,7 @@ def expect_info_dict(self, got_dict, expected_dict):
|
||||
'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
|
||||
|
||||
# Check for the presence of mandatory fields
|
||||
if got_dict.get('_type') != 'playlist':
|
||||
if got_dict.get('_type') not in ('playlist', 'multi_video'):
|
||||
for key in ('id', 'url', 'title', 'ext'):
|
||||
self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
|
||||
# Check for mandatory fields that are automatically set by YoutubeDL
|
||||
|
@@ -7,8 +7,7 @@
|
||||
"forcethumbnail": false,
|
||||
"forcetitle": false,
|
||||
"forceurl": false,
|
||||
"format": null,
|
||||
"format_limit": null,
|
||||
"format": "best",
|
||||
"ignoreerrors": false,
|
||||
"listformats": null,
|
||||
"logtostderr": false,
|
||||
|
@@ -14,6 +14,7 @@ from test.helper import FakeYDL, assertRegexpMatches
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.extractor import YoutubeIE
|
||||
from youtube_dl.postprocessor.common import PostProcessor
|
||||
from youtube_dl.utils import match_filter_func
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
@@ -100,39 +101,6 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['ext'], 'flv')
|
||||
|
||||
def test_format_limit(self):
|
||||
formats = [
|
||||
{'format_id': 'meh', 'url': 'http://example.com/meh', 'preference': 1},
|
||||
{'format_id': 'good', 'url': 'http://example.com/good', 'preference': 2},
|
||||
{'format_id': 'great', 'url': 'http://example.com/great', 'preference': 3},
|
||||
{'format_id': 'excellent', 'url': 'http://example.com/exc', 'preference': 4},
|
||||
]
|
||||
info_dict = _make_result(formats)
|
||||
|
||||
ydl = YDL()
|
||||
ydl.process_ie_result(info_dict)
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'excellent')
|
||||
|
||||
ydl = YDL({'format_limit': 'good'})
|
||||
assert ydl.params['format_limit'] == 'good'
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'good')
|
||||
|
||||
ydl = YDL({'format_limit': 'great', 'format': 'all'})
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'meh')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[1]['format_id'], 'good')
|
||||
self.assertEqual(ydl.downloaded_info_dicts[2]['format_id'], 'great')
|
||||
self.assertTrue('3' in ydl.msgs[0])
|
||||
|
||||
ydl = YDL()
|
||||
ydl.params['format_limit'] = 'excellent'
|
||||
ydl.process_ie_result(info_dict.copy())
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'excellent')
|
||||
|
||||
def test_format_selection(self):
|
||||
formats = [
|
||||
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
|
||||
@@ -339,6 +307,8 @@ class TestFormatSelection(unittest.TestCase):
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'G')
|
||||
|
||||
|
||||
class TestYoutubeDL(unittest.TestCase):
|
||||
def test_subtitles(self):
|
||||
def s_formats(lang, autocaption=False):
|
||||
return [{
|
||||
@@ -440,27 +410,103 @@ class TestFormatSelection(unittest.TestCase):
|
||||
def run(self, info):
|
||||
with open(audiofile, 'wt') as f:
|
||||
f.write('EXAMPLE')
|
||||
info['filepath']
|
||||
return False, info
|
||||
return [info['filepath']], info
|
||||
|
||||
def run_pp(params):
|
||||
def run_pp(params, PP):
|
||||
with open(filename, 'wt') as f:
|
||||
f.write('EXAMPLE')
|
||||
ydl = YoutubeDL(params)
|
||||
ydl.add_post_processor(SimplePP())
|
||||
ydl.add_post_processor(PP())
|
||||
ydl.post_process(filename, {'filepath': filename})
|
||||
|
||||
run_pp({'keepvideo': True})
|
||||
run_pp({'keepvideo': True}, SimplePP)
|
||||
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
|
||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||
os.unlink(filename)
|
||||
os.unlink(audiofile)
|
||||
|
||||
run_pp({'keepvideo': False})
|
||||
run_pp({'keepvideo': False}, SimplePP)
|
||||
self.assertFalse(os.path.exists(filename), '%s exists' % filename)
|
||||
self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
|
||||
os.unlink(audiofile)
|
||||
|
||||
class ModifierPP(PostProcessor):
|
||||
def run(self, info):
|
||||
with open(info['filepath'], 'wt') as f:
|
||||
f.write('MODIFIED')
|
||||
return [], info
|
||||
|
||||
run_pp({'keepvideo': False}, ModifierPP)
|
||||
self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
|
||||
os.unlink(filename)
|
||||
|
||||
def test_match_filter(self):
|
||||
class FilterYDL(YDL):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(FilterYDL, self).__init__(*args, **kwargs)
|
||||
self.params['simulate'] = True
|
||||
|
||||
def process_info(self, info_dict):
|
||||
super(YDL, self).process_info(info_dict)
|
||||
|
||||
def _match_entry(self, info_dict, incomplete):
|
||||
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
|
||||
if res is None:
|
||||
self.downloaded_info_dicts.append(info_dict)
|
||||
return res
|
||||
|
||||
first = {
|
||||
'id': '1',
|
||||
'url': TEST_URL,
|
||||
'title': 'one',
|
||||
'extractor': 'TEST',
|
||||
'duration': 30,
|
||||
'filesize': 10 * 1024,
|
||||
}
|
||||
second = {
|
||||
'id': '2',
|
||||
'url': TEST_URL,
|
||||
'title': 'two',
|
||||
'extractor': 'TEST',
|
||||
'duration': 10,
|
||||
'description': 'foo',
|
||||
'filesize': 5 * 1024,
|
||||
}
|
||||
videos = [first, second]
|
||||
|
||||
def get_videos(filter_=None):
|
||||
ydl = FilterYDL({'match_filter': filter_})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
res = get_videos()
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
def f(v):
|
||||
if v['id'] == '1':
|
||||
return None
|
||||
else:
|
||||
return 'Video id is not 1'
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
f = match_filter_func('duration < 30')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('description = foo')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['2'])
|
||||
|
||||
f = match_filter_func('description =? foo')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1', '2'])
|
||||
|
||||
f = match_filter_func('filesize > 5KiB')
|
||||
res = get_videos(f)
|
||||
self.assertEqual(res, ['1'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
55
test/test_aes.py
Normal file
55
test/test_aes.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_decrypt_text
|
||||
from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
|
||||
import base64
|
||||
|
||||
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||
|
||||
|
||||
class TestAES(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.key = self.iv = [0x20, 0x15] + 14 * [0]
|
||||
self.secret_msg = b'Secret message goes here'
|
||||
|
||||
def test_encrypt(self):
|
||||
msg = b'message'
|
||||
key = list(range(16))
|
||||
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||
self.assertEqual(decrypted, msg)
|
||||
|
||||
def test_cbc_decrypt(self):
|
||||
data = bytes_to_intlist(
|
||||
b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd"
|
||||
)
|
||||
decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_decrypt_text(self):
|
||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8]) +
|
||||
b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
|
||||
)
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
password = intlist_to_bytes(self.key).decode('utf-8')
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8]) +
|
||||
b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
|
||||
)
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -59,7 +59,7 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||
|
||||
def test_youtube_feeds(self):
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
|
||||
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater'])
|
||||
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||
|
@@ -153,7 +153,7 @@ def generator(test_case):
|
||||
break
|
||||
|
||||
if is_playlist:
|
||||
self.assertEqual(res_dict['_type'], 'playlist')
|
||||
self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video'])
|
||||
self.assertTrue('entries' in res_dict)
|
||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||
|
||||
|
@@ -1,4 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import unittest
|
||||
@@ -6,6 +8,9 @@ import unittest
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl.utils import encodeArgument
|
||||
|
||||
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
@@ -27,5 +32,12 @@ class TestExecution(unittest.TestCase):
|
||||
def test_main_exec(self):
|
||||
subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
|
||||
|
||||
def test_cmdline_umlauts(self):
|
||||
p = subprocess.Popen(
|
||||
[sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
|
||||
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
|
||||
_, stderr = p.communicate()
|
||||
self.assertFalse(stderr)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -8,7 +8,7 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
@@ -68,5 +68,52 @@ class TestHTTP(unittest.TestCase):
|
||||
r = ydl.extract_info('https://localhost:%d/video.html' % self.port)
|
||||
self.assertEqual(r['url'], 'https://localhost:%d/vid.mp4' % self.port)
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8'))
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('normal'))
|
||||
self.port = self.proxy.socket.getsockname()[1]
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.cn_proxy = compat_http_server.HTTPServer(
|
||||
('localhost', 0), _build_proxy_handler('cn'))
|
||||
self.cn_port = self.cn_proxy.socket.getsockname()[1]
|
||||
self.cn_proxy_thread = threading.Thread(target=self.cn_proxy.serve_forever)
|
||||
self.cn_proxy_thread.daemon = True
|
||||
self.cn_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
cn_proxy = 'localhost:{0}'.format(self.cn_port)
|
||||
ydl = YoutubeDL({
|
||||
'proxy': 'localhost:{0}'.format(self.port),
|
||||
'cn_verification_proxy': cn_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode('utf-8')
|
||||
self.assertEqual(response, 'normal: {0}'.format(url))
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', cn_proxy)
|
||||
response = ydl.urlopen(req).read().decode('utf-8')
|
||||
self.assertEqual(response, 'cn: {0}'.format(url))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -24,6 +24,7 @@ from youtube_dl.utils import (
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
InAdvancePagedList,
|
||||
@@ -39,6 +40,7 @@ from youtube_dl.utils import (
|
||||
read_batch_urls,
|
||||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url_path_consecutive_slashes,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_to_int,
|
||||
@@ -53,8 +55,11 @@ from youtube_dl.utils import (
|
||||
urlencode_postdata,
|
||||
version_tuple,
|
||||
xpath_with_ns,
|
||||
xpath_text,
|
||||
render_table,
|
||||
match_str,
|
||||
parse_dfxp_time_expr,
|
||||
dfxp2srt,
|
||||
)
|
||||
|
||||
|
||||
@@ -168,6 +173,26 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||
|
||||
def test_sanitize_url_path_consecutive_slashes(self):
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname//'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
|
||||
'http://hostname/foo/bar/filename.html')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/'),
|
||||
'http://hostname/')
|
||||
self.assertEqual(
|
||||
sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
|
||||
'http://hostname/abc/')
|
||||
|
||||
def test_ordered_set(self):
|
||||
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
|
||||
self.assertEqual(orderedSet([]), [])
|
||||
@@ -177,6 +202,8 @@ class TestUtil(unittest.TestCase):
|
||||
|
||||
def test_unescape_html(self):
|
||||
self.assertEqual(unescapeHTML('%20;'), '%20;')
|
||||
self.assertEqual(unescapeHTML('/'), '/')
|
||||
self.assertEqual(unescapeHTML('/'), '/')
|
||||
self.assertEqual(
|
||||
unescapeHTML('é'), 'é')
|
||||
|
||||
@@ -202,6 +229,7 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
|
||||
'20150202')
|
||||
self.assertEqual(unified_strdate('25-09-2014'), '20140925')
|
||||
|
||||
def test_find_xpath_attr(self):
|
||||
testxml = '''<root>
|
||||
@@ -229,6 +257,17 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(find('media:song/media:author').text, 'The Author')
|
||||
self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3')
|
||||
|
||||
def test_xpath_text(self):
|
||||
testxml = '''<root>
|
||||
<div>
|
||||
<p>Foo</p>
|
||||
</div>
|
||||
</root>'''
|
||||
doc = xml.etree.ElementTree.fromstring(testxml)
|
||||
self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
|
||||
self.assertTrue(xpath_text(doc, 'div/bar') is None)
|
||||
self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
|
||||
|
||||
def test_smuggle_url(self):
|
||||
data = {"ö": "ö", "abc": [3]}
|
||||
url = 'https://foo.bar/baz?x=y#a'
|
||||
@@ -434,6 +473,12 @@ class TestUtil(unittest.TestCase):
|
||||
self.assertEqual(d['x'], 1)
|
||||
self.assertEqual(d['y'], 'a')
|
||||
|
||||
on = js_to_json('["abc", "def",]')
|
||||
self.assertEqual(json.loads(on), ['abc', 'def'])
|
||||
|
||||
on = js_to_json('{"abc": "def",}')
|
||||
self.assertEqual(json.loads(on), {'abc': 'def'})
|
||||
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
@@ -538,6 +583,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
|
||||
'like_count > 100 & dislike_count <? 50 & description',
|
||||
{'like_count': 190, 'dislike_count': 10}))
|
||||
|
||||
def test_parse_dfxp_time_expr(self):
|
||||
self.assertEqual(parse_dfxp_time_expr(None), 0.0)
|
||||
self.assertEqual(parse_dfxp_time_expr(''), 0.0)
|
||||
self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
|
||||
self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
|
||||
self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
|
||||
self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
|
||||
|
||||
def test_dfxp2srt(self):
|
||||
dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
|
||||
<tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
|
||||
<body>
|
||||
<div xml:lang="en">
|
||||
<p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
|
||||
<p begin="1" end="2">第二行<br/>♪♪</p>
|
||||
<p begin="2" end="3"><span>Third<br/>Line</span></p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
srt_data = '''1
|
||||
00:00:00,000 --> 00:00:01,000
|
||||
The following line contains Chinese characters and special symbols
|
||||
|
||||
2
|
||||
00:00:01,000 --> 00:00:02,000
|
||||
第二行
|
||||
♪♪
|
||||
|
||||
3
|
||||
00:00:02,000 --> 00:00:03,000
|
||||
Third
|
||||
Line
|
||||
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data), srt_data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@@ -64,7 +64,6 @@ from .utils import (
|
||||
sanitize_path,
|
||||
std_headers,
|
||||
subtitles_filename,
|
||||
takewhile_inclusive,
|
||||
UnavailableVideoError,
|
||||
url_basename,
|
||||
version_tuple,
|
||||
@@ -135,7 +134,6 @@ class YoutubeDL(object):
|
||||
(or video) as a single JSON line.
|
||||
simulate: Do not download the video files.
|
||||
format: Video format code. See options.py for more information.
|
||||
format_limit: Highest quality format to try.
|
||||
outtmpl: Template for output names.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
ignoreerrors: Do not stop on download errors.
|
||||
@@ -328,9 +326,6 @@ class YoutubeDL(object):
|
||||
'Parameter outtmpl is bytes, but should be a unicode string. '
|
||||
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
|
||||
|
||||
if '%(stitle)s' in self.params.get('outtmpl', ''):
|
||||
self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
if auto_init:
|
||||
@@ -922,6 +917,11 @@ class YoutubeDL(object):
|
||||
if format_spec == 'best' or format_spec is None:
|
||||
return available_formats[-1]
|
||||
elif format_spec == 'worst':
|
||||
audiovideo_formats = [
|
||||
f for f in available_formats
|
||||
if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
|
||||
if audiovideo_formats:
|
||||
return audiovideo_formats[0]
|
||||
return available_formats[0]
|
||||
elif format_spec == 'bestaudio':
|
||||
audio_formats = [
|
||||
@@ -1066,12 +1066,6 @@ class YoutubeDL(object):
|
||||
full_format_info.update(format)
|
||||
format['http_headers'] = self._calc_headers(full_format_info)
|
||||
|
||||
format_limit = self.params.get('format_limit', None)
|
||||
if format_limit:
|
||||
formats = list(takewhile_inclusive(
|
||||
lambda f: f['format_id'] != format_limit, formats
|
||||
))
|
||||
|
||||
# TODO Central sorting goes here
|
||||
|
||||
if formats[0] is not info_dict:
|
||||
@@ -1089,7 +1083,11 @@ class YoutubeDL(object):
|
||||
|
||||
req_format = self.params.get('format')
|
||||
if req_format is None:
|
||||
req_format = 'best'
|
||||
req_format_list = []
|
||||
if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available:
|
||||
req_format_list.append('bestvideo+bestaudio')
|
||||
req_format_list.append('best')
|
||||
req_format = '/'.join(req_format_list)
|
||||
formats_to_download = []
|
||||
if req_format == 'all':
|
||||
formats_to_download = formats
|
||||
@@ -1218,9 +1216,6 @@ class YoutubeDL(object):
|
||||
if len(info_dict['title']) > 200:
|
||||
info_dict['title'] = info_dict['title'][:197] + '...'
|
||||
|
||||
# Keep for backwards compatibility
|
||||
info_dict['stitle'] = info_dict['title']
|
||||
|
||||
if 'format' not in info_dict:
|
||||
info_dict['format'] = info_dict['ext']
|
||||
|
||||
@@ -1362,7 +1357,7 @@ class YoutubeDL(object):
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
downloaded = []
|
||||
success = True
|
||||
merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
|
||||
merger = FFmpegMergerPP(self)
|
||||
if not merger.available:
|
||||
postprocessors = []
|
||||
self.report_warning('You have requested multiple '
|
||||
@@ -1370,16 +1365,42 @@ class YoutubeDL(object):
|
||||
' The formats won\'t be merged')
|
||||
else:
|
||||
postprocessors = [merger]
|
||||
for f in info_dict['requested_formats']:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(f)
|
||||
fname = self.prepare_filename(new_info)
|
||||
fname = prepend_extension(fname, 'f%s' % f['format_id'])
|
||||
downloaded.append(fname)
|
||||
partial_success = dl(fname, new_info)
|
||||
success = success and partial_success
|
||||
info_dict['__postprocessors'] = postprocessors
|
||||
info_dict['__files_to_merge'] = downloaded
|
||||
|
||||
def compatible_formats(formats):
|
||||
video, audio = formats
|
||||
# Check extension
|
||||
video_ext, audio_ext = audio.get('ext'), video.get('ext')
|
||||
if video_ext and audio_ext:
|
||||
COMPATIBLE_EXTS = (
|
||||
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
|
||||
('webm')
|
||||
)
|
||||
for exts in COMPATIBLE_EXTS:
|
||||
if video_ext in exts and audio_ext in exts:
|
||||
return True
|
||||
# TODO: Check acodec/vcodec
|
||||
return False
|
||||
|
||||
requested_formats = info_dict['requested_formats']
|
||||
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
|
||||
filename = os.path.splitext(filename)[0] + '.mkv'
|
||||
self.report_warning('You have requested formats uncompatible for merge. '
|
||||
'The formats will be merged into mkv')
|
||||
if os.path.exists(encodeFilename(filename)):
|
||||
self.to_screen(
|
||||
'[download] %s has already been downloaded and '
|
||||
'merged' % filename)
|
||||
else:
|
||||
for f in requested_formats:
|
||||
new_info = dict(info_dict)
|
||||
new_info.update(f)
|
||||
fname = self.prepare_filename(new_info)
|
||||
fname = prepend_extension(fname, 'f%s' % f['format_id'])
|
||||
downloaded.append(fname)
|
||||
partial_success = dl(fname, new_info)
|
||||
success = success and partial_success
|
||||
info_dict['__postprocessors'] = postprocessors
|
||||
info_dict['__files_to_merge'] = downloaded
|
||||
else:
|
||||
# Just a single file
|
||||
success = dl(filename, info_dict)
|
||||
@@ -1487,24 +1508,17 @@ class YoutubeDL(object):
|
||||
pps_chain.extend(ie_info['__postprocessors'])
|
||||
pps_chain.extend(self._pps)
|
||||
for pp in pps_chain:
|
||||
keep_video = None
|
||||
old_filename = info['filepath']
|
||||
try:
|
||||
keep_video_wish, info = pp.run(info)
|
||||
if keep_video_wish is not None:
|
||||
if keep_video_wish:
|
||||
keep_video = keep_video_wish
|
||||
elif keep_video is None:
|
||||
# No clear decision yet, let IE decide
|
||||
keep_video = keep_video_wish
|
||||
files_to_delete, info = pp.run(info)
|
||||
except PostProcessingError as e:
|
||||
self.report_error(e.msg)
|
||||
if keep_video is False and not self.params.get('keepvideo', False):
|
||||
try:
|
||||
if files_to_delete and not self.params.get('keepvideo', False):
|
||||
for old_filename in files_to_delete:
|
||||
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
|
||||
os.remove(encodeFilename(old_filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning('Unable to remove downloaded video file')
|
||||
try:
|
||||
os.remove(encodeFilename(old_filename))
|
||||
except (IOError, OSError):
|
||||
self.report_warning('Unable to remove downloaded original file')
|
||||
|
||||
def _make_archive_id(self, info_dict):
|
||||
# Future-proof against any change in case
|
||||
@@ -1707,10 +1721,10 @@ class YoutubeDL(object):
|
||||
out = out.decode().strip()
|
||||
if re.match('[0-9a-f]+', out):
|
||||
self._write_string('[debug] Git HEAD: ' + out + '\n')
|
||||
except:
|
||||
except Exception:
|
||||
try:
|
||||
sys.exc_clear()
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
self._write_string('[debug] Python version %s - %s\n' % (
|
||||
platform.python_version(), platform_name()))
|
||||
|
@@ -189,10 +189,6 @@ def _real_main(argv=None):
|
||||
if opts.allsubtitles and not opts.writeautomaticsub:
|
||||
opts.writesubtitles = True
|
||||
|
||||
if sys.version_info < (3,):
|
||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||
if opts.outtmpl is not None:
|
||||
opts.outtmpl = opts.outtmpl.decode(preferredencoding())
|
||||
outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
|
||||
(opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
|
||||
(opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
|
||||
@@ -244,9 +240,7 @@ def _real_main(argv=None):
|
||||
if opts.xattrs:
|
||||
postprocessors.append({'key': 'XAttrMetadata'})
|
||||
if opts.embedthumbnail:
|
||||
if not opts.addmetadata:
|
||||
postprocessors.append({'key': 'FFmpegAudioFix'})
|
||||
postprocessors.append({'key': 'AtomicParsley'})
|
||||
postprocessors.append({'key': 'EmbedThumbnail'})
|
||||
# Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
|
||||
# So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
|
||||
if opts.exec_cmd:
|
||||
@@ -289,7 +283,6 @@ def _real_main(argv=None):
|
||||
'simulate': opts.simulate or any_getting,
|
||||
'skip_download': opts.skip_download,
|
||||
'format': opts.format,
|
||||
'format_limit': opts.format_limit,
|
||||
'listformats': opts.listformats,
|
||||
'outtmpl': outtmpl,
|
||||
'autonumber_size': opts.autonumber_size,
|
||||
|
@@ -389,7 +389,7 @@ else:
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = sp.communicate()
|
||||
lines, columns = map(int, out.split())
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
return _terminal_size(columns, lines)
|
||||
|
||||
|
@@ -6,7 +6,7 @@ from .f4m import F4mFD
|
||||
from .hls import HlsFD
|
||||
from .hls import NativeHlsFD
|
||||
from .http import HttpFD
|
||||
from .mplayer import MplayerFD
|
||||
from .rtsp import RtspFD
|
||||
from .rtmp import RtmpFD
|
||||
|
||||
from ..utils import (
|
||||
@@ -17,8 +17,8 @@ PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
'm3u8_native': NativeHlsFD,
|
||||
'm3u8': HlsFD,
|
||||
'mms': MplayerFD,
|
||||
'rtsp': MplayerFD,
|
||||
'mms': RtspFD,
|
||||
'rtsp': RtspFD,
|
||||
'f4m': F4mFD,
|
||||
}
|
||||
|
||||
|
@@ -8,6 +8,7 @@ import time
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
decodeArgument,
|
||||
format_bytes,
|
||||
timeconvert,
|
||||
)
|
||||
@@ -204,7 +205,7 @@ class FileDownloader(object):
|
||||
return
|
||||
try:
|
||||
os.utime(filename, (time.time(), filetime))
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
return filetime
|
||||
|
||||
@@ -318,7 +319,7 @@ class FileDownloader(object):
|
||||
)
|
||||
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', False) and
|
||||
self.params.get('continuedl', True) and
|
||||
os.path.isfile(encodeFilename(filename)) and
|
||||
not self.params.get('nopart', False)
|
||||
)
|
||||
@@ -353,19 +354,15 @@ class FileDownloader(object):
|
||||
# this interface
|
||||
self._progress_hooks.append(ph)
|
||||
|
||||
def _debug_cmd(self, args, subprocess_encoding, exe=None):
|
||||
def _debug_cmd(self, args, exe=None):
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(args[0])
|
||||
str_args = [decodeArgument(a) for a in args]
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
|
||||
if subprocess_encoding:
|
||||
str_args = [
|
||||
a.decode(subprocess_encoding) if isinstance(a, bytes) else a
|
||||
for a in args]
|
||||
else:
|
||||
str_args = args
|
||||
try:
|
||||
import pipes
|
||||
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
|
||||
|
@@ -2,11 +2,11 @@ from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
)
|
||||
|
||||
|
||||
@@ -60,17 +60,9 @@ class ExternalFD(FileDownloader):
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = self._make_cmd(tmpfilename, info_dict)
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
self._debug_cmd(cmd, subprocess_encoding)
|
||||
self._debug_cmd(cmd)
|
||||
|
||||
p = subprocess.Popen(
|
||||
cmd, stderr=subprocess.PIPE)
|
||||
|
@@ -389,6 +389,8 @@ class F4mFD(FileDownloader):
|
||||
url = base_url + name
|
||||
if akamai_pv:
|
||||
url += '?' + akamai_pv.strip(';')
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
url += info_dict.get('extra_param_to_segment_url')
|
||||
frag_filename = '%s-%s' % (tmpfilename, name)
|
||||
try:
|
||||
success = http_dl.download(frag_filename, {'url': url})
|
||||
|
@@ -49,7 +49,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
open_mode = 'wb'
|
||||
if resume_len != 0:
|
||||
if self.params.get('continuedl', False):
|
||||
if self.params.get('continuedl', True):
|
||||
self.report_resuming_byte(resume_len)
|
||||
request.add_header('Range', 'bytes=%d-' % resume_len)
|
||||
open_mode = 'ab'
|
||||
|
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from .common import FileDownloader
|
||||
@@ -11,6 +10,7 @@ from ..compat import compat_str
|
||||
from ..utils import (
|
||||
check_executable,
|
||||
encodeFilename,
|
||||
encodeArgument,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
@@ -105,7 +105,7 @@ class RtmpFD(FileDownloader):
|
||||
protocol = info_dict.get('rtmp_protocol', None)
|
||||
real_time = info_dict.get('rtmp_real_time', False)
|
||||
no_resume = info_dict.get('no_resume', False)
|
||||
continue_dl = info_dict.get('continuedl', False)
|
||||
continue_dl = info_dict.get('continuedl', True)
|
||||
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
@@ -121,7 +121,7 @@ class RtmpFD(FileDownloader):
|
||||
# possible. This is part of rtmpdump's normal usage, AFAIK.
|
||||
basic_args = [
|
||||
'rtmpdump', '--verbose', '-r', url,
|
||||
'-o', encodeFilename(tmpfilename, True)]
|
||||
'-o', tmpfilename]
|
||||
if player_url is not None:
|
||||
basic_args += ['--swfVfy', player_url]
|
||||
if page_url is not None:
|
||||
@@ -154,16 +154,9 @@ class RtmpFD(FileDownloader):
|
||||
if not live and continue_dl:
|
||||
args += ['--skip', '1']
|
||||
|
||||
if sys.platform == 'win32' and sys.version_info < (3, 0):
|
||||
# Windows subprocess module does not actually support Unicode
|
||||
# on Python 2.x
|
||||
# See http://stackoverflow.com/a/9951851/35070
|
||||
subprocess_encoding = sys.getfilesystemencoding()
|
||||
args = [a.encode(subprocess_encoding, 'ignore') for a in args]
|
||||
else:
|
||||
subprocess_encoding = None
|
||||
args = [encodeArgument(a) for a in args]
|
||||
|
||||
self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
|
||||
self._debug_cmd(args, exe='rtmpdump')
|
||||
|
||||
RD_SUCCESS = 0
|
||||
RD_FAILED = 1
|
||||
@@ -180,7 +173,11 @@ class RtmpFD(FileDownloader):
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] %s bytes' % prevsize)
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
|
||||
args = basic_args + ['--resume']
|
||||
if retval == RD_FAILED:
|
||||
args += ['--skip', '1']
|
||||
args = [encodeArgument(a) for a in args]
|
||||
retval = run_rtmpdump(args)
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
|
@@ -10,21 +10,23 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class MplayerFD(FileDownloader):
|
||||
class RtspFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
args = [
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
# Check for mplayer first
|
||||
if not check_executable('mplayer', ['-h']):
|
||||
self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
|
||||
if check_executable('mplayer', ['-h']):
|
||||
args = [
|
||||
'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
|
||||
'-dumpstream', '-dumpfile', tmpfilename, url]
|
||||
elif check_executable('mpv', ['-h']):
|
||||
args = [
|
||||
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
|
||||
else:
|
||||
self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
|
||||
return False
|
||||
|
||||
# Download using mplayer.
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
@@ -39,5 +41,5 @@ class MplayerFD(FileDownloader):
|
||||
return True
|
||||
else:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('mplayer exited with code %d' % retval)
|
||||
self.report_error('%s exited with code %d' % (args[0], retval))
|
||||
return False
|
@@ -70,6 +70,7 @@ from .chirbit import (
|
||||
ChirbitProfileIE,
|
||||
)
|
||||
from .cinchcast import CinchcastIE
|
||||
from .cinemassacre import CinemassacreIE
|
||||
from .clipfish import ClipfishIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clipsyndicate import ClipsyndicateIE
|
||||
@@ -90,6 +91,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||
from .condenast import CondeNastIE
|
||||
from .cracked import CrackedIE
|
||||
from .criterion import CriterionIE
|
||||
from .crooksandliars import CrooksAndLiarsIE
|
||||
from .crunchyroll import (
|
||||
CrunchyrollIE,
|
||||
CrunchyrollShowPlaylistIE
|
||||
@@ -106,13 +108,16 @@ from .dbtv import DBTVIE
|
||||
from .dctp import DctpTvIE
|
||||
from .deezer import DeezerPlaylistIE
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .dotsub import DotsubIE
|
||||
from .douyutv import DouyuTVIE
|
||||
from .dreisat import DreiSatIE
|
||||
from .drbonanza import DRBonanzaIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import DRTVIE
|
||||
from .dvtv import DVTVIE
|
||||
from .dump import DumpIE
|
||||
from .dumpert import DumpertIE
|
||||
from .defense import DefenseGouvFrIE
|
||||
from .discovery import DiscoveryIE
|
||||
from .divxstage import DivxStageIE
|
||||
@@ -174,12 +179,14 @@ from .gameone import (
|
||||
GameOneIE,
|
||||
GameOnePlaylistIE,
|
||||
)
|
||||
from .gamersyde import GamersydeIE
|
||||
from .gamespot import GameSpotIE
|
||||
from .gamestar import GameStarIE
|
||||
from .gametrailers import GametrailersIE
|
||||
from .gazeta import GazetaIE
|
||||
from .gdcvault import GDCVaultIE
|
||||
from .generic import GenericIE
|
||||
from .gfycat import GfycatIE
|
||||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
@@ -249,6 +256,7 @@ from .letv import (
|
||||
LetvTvIE,
|
||||
LetvPlaylistIE
|
||||
)
|
||||
from .libsyn import LibsynIE
|
||||
from .lifenews import LifeNewsIE
|
||||
from .liveleak import LiveLeakIE
|
||||
from .livestream import (
|
||||
@@ -267,11 +275,13 @@ from .macgamestore import MacGameStoreIE
|
||||
from .mailru import MailRuIE
|
||||
from .malemotion import MalemotionIE
|
||||
from .mdr import MDRIE
|
||||
from .megavideoz import MegaVideozIE
|
||||
from .metacafe import MetacafeIE
|
||||
from .metacritic import MetacriticIE
|
||||
from .mgoon import MgoonIE
|
||||
from .minhateca import MinhatecaIE
|
||||
from .ministrygrid import MinistryGridIE
|
||||
from .miomio import MioMioIE
|
||||
from .mit import TechTVMITIE, MITIE, OCWMITIE
|
||||
from .mitele import MiTeleIE
|
||||
from .mixcloud import MixcloudIE
|
||||
@@ -307,6 +317,8 @@ from .nba import NBAIE
|
||||
from .nbc import (
|
||||
NBCIE,
|
||||
NBCNewsIE,
|
||||
NBCSportsIE,
|
||||
NBCSportsVPlayerIE,
|
||||
)
|
||||
from .ndr import NDRIE
|
||||
from .ndtv import NDTVIE
|
||||
@@ -345,6 +357,7 @@ from .npo import (
|
||||
)
|
||||
from .nrk import (
|
||||
NRKIE,
|
||||
NRKPlaylistIE,
|
||||
NRKTVIE,
|
||||
)
|
||||
from .ntvde import NTVDeIE
|
||||
@@ -364,6 +377,7 @@ from .orf import (
|
||||
from .parliamentliveuk import ParliamentLiveUKIE
|
||||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .planetaplay import PlanetaPlayIE
|
||||
@@ -379,14 +393,22 @@ from .pornhub import (
|
||||
PornHubPlaylistIE,
|
||||
)
|
||||
from .pornotube import PornotubeIE
|
||||
from .pornovoisines import PornoVoisinesIE
|
||||
from .pornoxo import PornoXOIE
|
||||
from .primesharetv import PrimeShareTVIE
|
||||
from .promptfile import PromptFileIE
|
||||
from .prosiebensat1 import ProSiebenSat1IE
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
QQMusicSingerIE,
|
||||
QQMusicAlbumIE,
|
||||
)
|
||||
from .quickvid import QuickVidIE
|
||||
from .r7 import R7IE
|
||||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import RadioFranceIE
|
||||
from .rai import RaiIE
|
||||
@@ -405,7 +427,7 @@ from .rtlnow import RTLnowIE
|
||||
from .rtl2 import RTL2IE
|
||||
from .rtp import RTPIE
|
||||
from .rts import RTSIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE
|
||||
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
|
||||
from .ruhd import RUHDIE
|
||||
from .rutube import (
|
||||
RutubeIE,
|
||||
@@ -416,13 +438,18 @@ from .rutube import (
|
||||
)
|
||||
from .rutv import RUTVIE
|
||||
from .sandia import SandiaIE
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariCourseIE,
|
||||
)
|
||||
from .sapo import SapoIE
|
||||
from .savefrom import SaveFromIE
|
||||
from .sbs import SBSIE
|
||||
from .scivee import SciVeeIE
|
||||
from .screencast import ScreencastIE
|
||||
from .screencastomatic import ScreencastOMaticIE
|
||||
from .screenwavemedia import CinemassacreIE, ScreenwaveMediaIE, TeamFourIE
|
||||
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .servingsys import ServingSysIE
|
||||
from .sexu import SexuIE
|
||||
from .sexykarma import SexyKarmaIE
|
||||
@@ -452,9 +479,11 @@ from .soundgasm import (
|
||||
)
|
||||
from .southpark import (
|
||||
SouthParkIE,
|
||||
SouthParkEsIE,
|
||||
SouthparkDeIE,
|
||||
)
|
||||
from .space import SpaceIE
|
||||
from .spankbang import SpankBangIE
|
||||
from .spankwire import SpankwireIE
|
||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
||||
from .spiegeltv import SpiegeltvIE
|
||||
@@ -462,6 +491,7 @@ from .spike import SpikeIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .srf import SrfIE
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .ssa import SSAIE
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
@@ -522,6 +552,10 @@ from .tvp import TvpIE, TvpSeriesIE
|
||||
from .tvplay import TVPlayIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentytwotracks import (
|
||||
TwentyTwoTracksIE,
|
||||
TwentyTwoTracksGenreIE
|
||||
)
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@@ -536,12 +570,16 @@ from .udemy import (
|
||||
UdemyIE,
|
||||
UdemyCourseIE
|
||||
)
|
||||
from .udn import UDNEmbedIE
|
||||
from .ultimedia import UltimediaIE
|
||||
from .unistra import UnistraIE
|
||||
from .urort import UrortIE
|
||||
from .ustream import UstreamIE, UstreamChannelIE
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veehd import VeeHDIE
|
||||
from .veoh import VeohIE
|
||||
from .vessel import VesselIE
|
||||
from .vesti import VestiIE
|
||||
from .vevo import VevoIE
|
||||
from .vgtv import VGTVIE
|
||||
|
@@ -11,12 +11,13 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class AddAnimeIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P<id>[\w_]+)(?:.*)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||
'md5': '72954ea10bc979ab5e2eb288b21425a0',
|
||||
'info_dict': {
|
||||
@@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor):
|
||||
'description': 'One Piece 606',
|
||||
'title': 'One Piece 606',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor):
|
||||
note='Confirming after redirect')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
FORMATS = ('normal', 'hq')
|
||||
quality = qualities(FORMATS)
|
||||
formats = []
|
||||
for format_id in ('normal', 'hq'):
|
||||
for format_id in FORMATS:
|
||||
rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
|
||||
video_url = self._search_regex(rex, webpage, 'video file URLx',
|
||||
fatal=False)
|
||||
@@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor):
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': video_url,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
video_title = self._og_search_title(webpage)
|
||||
|
@@ -2,10 +2,11 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class AftonbladetIE(InfoExtractor):
|
||||
_VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?P<video_id>article[0-9]+)\.ab(?:$|[?#])'
|
||||
_VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?P<id>article[0-9]+)\.ab(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab',
|
||||
'info_dict': {
|
||||
@@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor):
|
||||
formats.append({
|
||||
'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']),
|
||||
'ext': 'mp4',
|
||||
'width': fmt['width'],
|
||||
'height': fmt['height'],
|
||||
'tbr': fmt['bitrate'],
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'height': int_or_none(fmt.get('height')),
|
||||
'tbr': int_or_none(fmt.get('bitrate')),
|
||||
'protocol': 'http',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
@@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': internal_meta_json['title'],
|
||||
'formats': formats,
|
||||
'thumbnail': internal_meta_json['imageUrl'],
|
||||
'description': internal_meta_json['shortPreamble'],
|
||||
'timestamp': internal_meta_json['timePublished'],
|
||||
'duration': internal_meta_json['duration'],
|
||||
'view_count': internal_meta_json['views'],
|
||||
'thumbnail': internal_meta_json.get('imageUrl'),
|
||||
'description': internal_meta_json.get('shortPreamble'),
|
||||
'timestamp': int_or_none(internal_meta_json.get('timePublished')),
|
||||
'duration': int_or_none(internal_meta_json.get('duration')),
|
||||
'view_count': int_or_none(internal_meta_json.get('views')),
|
||||
}
|
||||
|
@@ -146,6 +146,7 @@ class ArteTVPlus7IE(InfoExtractor):
|
||||
|
||||
formats.append(format)
|
||||
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
info_dict['formats'] = formats
|
||||
|
@@ -1,12 +1,18 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import itertools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -14,6 +20,8 @@ class BambuserIE(InfoExtractor):
|
||||
IE_NAME = 'bambuser'
|
||||
_VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
|
||||
_API_KEY = '005f64509e19a868399060af746a00aa'
|
||||
_LOGIN_URL = 'https://bambuser.com/user'
|
||||
_NETRC_MACHINE = 'bambuser'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://bambuser.com/v/4050584',
|
||||
@@ -26,6 +34,9 @@ class BambuserIE(InfoExtractor):
|
||||
'duration': 3741,
|
||||
'uploader': 'pixelversity',
|
||||
'uploader_id': '344706',
|
||||
'timestamp': 1382976692,
|
||||
'upload_date': '20131028',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
# It doesn't respect the 'Range' header, it would download the whole video
|
||||
@@ -34,23 +45,60 @@ class BambuserIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
login_form = {
|
||||
'form_id': 'user_login',
|
||||
'op': 'Log in',
|
||||
'name': username,
|
||||
'pass': password,
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
|
||||
request.add_header('Referer', self._LOGIN_URL)
|
||||
response = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
login_error = self._html_search_regex(
|
||||
r'(?s)<div class="messages error">(.+?)</div>',
|
||||
response, 'login error', default=None)
|
||||
if login_error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % login_error, expected=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info_url = ('http://player-c.api.bambuser.com/getVideo.json?'
|
||||
'&api_key=%s&vid=%s' % (self._API_KEY, video_id))
|
||||
info_json = self._download_webpage(info_url, video_id)
|
||||
info = json.loads(info_json)['result']
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
|
||||
% (self._API_KEY, video_id), video_id)
|
||||
|
||||
error = info.get('error')
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
result = info['result']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info['title'],
|
||||
'url': info['url'],
|
||||
'thumbnail': info.get('preview'),
|
||||
'duration': int(info['length']),
|
||||
'view_count': int(info['views_total']),
|
||||
'uploader': info['username'],
|
||||
'uploader_id': info['owner']['uid'],
|
||||
'title': result['title'],
|
||||
'url': result['url'],
|
||||
'thumbnail': result.get('preview'),
|
||||
'duration': int_or_none(result.get('length')),
|
||||
'uploader': result.get('username'),
|
||||
'uploader_id': compat_str(result.get('owner', {}).get('uid')),
|
||||
'timestamp': int_or_none(result.get('created')),
|
||||
'fps': float_or_none(result.get('framerate')),
|
||||
'view_count': int_or_none(result.get('views_total')),
|
||||
'comment_count': int_or_none(result.get('comment_count')),
|
||||
}
|
||||
|
||||
|
||||
|
@@ -72,7 +72,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$',
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
|
||||
download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page')
|
||||
|
@@ -7,6 +7,7 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -30,6 +31,8 @@ class BiliBiliIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if self._search_regex(r'(此视频不存在或被删除)', webpage, 'error message', default=None):
|
||||
raise ExtractorError('The video does not exist or was deleted', expected=True)
|
||||
video_code = self._search_regex(
|
||||
r'(?s)<div itemprop="video".*?>(.*?)</div>', webpage, 'video code')
|
||||
|
||||
|
@@ -102,6 +102,15 @@ class BlipTVIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
if mobj:
|
||||
return 'http://blip.tv/a/a-' + mobj.group(1)
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
lookup_id = mobj.group('lookup_id')
|
||||
@@ -172,6 +181,7 @@ class BlipTVIE(InfoExtractor):
|
||||
'width': int_or_none(media_content.get('width')),
|
||||
'height': int_or_none(media_content.get('height')),
|
||||
})
|
||||
self._check_formats(formats, video_id)
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, subtitles_urls)
|
||||
|
@@ -6,32 +6,39 @@ from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloombergIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html'
|
||||
_VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P<id>[^/?#]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
|
||||
'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
|
||||
# The md5 checksum changes
|
||||
'info_dict': {
|
||||
'id': 'qurhIVlJSB6hzkVi229d8g',
|
||||
'ext': 'flv',
|
||||
'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
|
||||
'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88',
|
||||
'description': 'md5:a8ba0302912d03d246979735c17d2761',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
f4m_url = self._search_regex(
|
||||
r'<source src="(https?://[^"]+\.f4m.*?)"', webpage,
|
||||
'f4m url')
|
||||
video_id = self._search_regex(r'"bmmrId":"(.+?)"', webpage, 'id')
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
|
||||
formats = []
|
||||
for stream in embed_info['streams']:
|
||||
if stream["muxing_format"] == "TS":
|
||||
formats.extend(self._extract_m3u8_formats(stream['url'], video_id))
|
||||
else:
|
||||
formats.extend(self._extract_f4m_formats(stream['url'], video_id))
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': name.split('-')[-1],
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_f4m_formats(f4m_url, name),
|
||||
'formats': formats,
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
@@ -117,7 +117,10 @@ class BrightcoveIE(InfoExtractor):
|
||||
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
||||
object_str = fix_xml_ampersands(object_str)
|
||||
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
try:
|
||||
object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
|
||||
except xml.etree.ElementTree.ParseError:
|
||||
return
|
||||
|
||||
fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
|
||||
if fv_el is not None:
|
||||
@@ -183,9 +186,9 @@ class BrightcoveIE(InfoExtractor):
|
||||
(?:
|
||||
[^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
|
||||
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
|
||||
).+?</object>''',
|
||||
).+?>\s*</object>''',
|
||||
webpage)
|
||||
return [cls._build_brighcove_url(m) for m in matches]
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
99
youtube_dl/extractor/cinemassacre.py
Normal file
99
youtube_dl/extractor/cinemassacre.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from .bliptv import BlipTVIE
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
},
|
||||
{
|
||||
# blip.tv embedded video
|
||||
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
|
||||
'md5': 'ca9b3c8dd5a66f9375daeb5135f5a3de',
|
||||
'info_dict': {
|
||||
'id': '4065369',
|
||||
'ext': 'flv',
|
||||
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
|
||||
'upload_date': '20061207',
|
||||
'uploader': 'cinemassacre',
|
||||
'uploader_id': '250778',
|
||||
'timestamp': 1283233867,
|
||||
'description': 'md5:0a108c78d130676b207d0f6d029ecffd',
|
||||
}
|
||||
},
|
||||
{
|
||||
# Youtube embedded video
|
||||
'url': 'http://cinemassacre.com/2006/09/01/mckids/',
|
||||
'md5': '6eb30961fa795fedc750eac4881ad2e1',
|
||||
'info_dict': {
|
||||
'id': 'FnxsNhuikpo',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20060901',
|
||||
'uploader': 'Cinemassacre Extras',
|
||||
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
|
||||
'uploader_id': 'Cinemassacre',
|
||||
'title': 'AVGN: McKids',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
[
|
||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
r'<iframe[^>]+src="((?:https?:)?//(?:[^.]+\.)?youtube\.com/.+?)"',
|
||||
],
|
||||
webpage, 'player data URL', default=None)
|
||||
if not playerdata_url:
|
||||
playerdata_url = BlipTVIE._extract_url(webpage)
|
||||
if not playerdata_url:
|
||||
raise ExtractorError('Unable to find player data')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
'url': playerdata_url,
|
||||
}
|
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
class CNNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:(?:edition|www)\.)?cnn\.com/video/(?:data/.+?|\?)/
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:cnn|hln)(?:-ap)?|(?=&)))'''
|
||||
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||
@@ -45,6 +45,12 @@ class CNNIE(InfoExtractor):
|
||||
'description': 'md5:e7223a503315c9f150acac52e76de086',
|
||||
'upload_date': '20141222',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -201,7 +201,7 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
|
||||
|
||||
uri = mMovieParams[0][1]
|
||||
# Correct cc.com in uri
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.cc.com', uri)
|
||||
uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
|
||||
|
||||
index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
|
||||
idoc = self._download_xml(
|
||||
|
@@ -23,6 +23,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
age_restricted,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
compiled_regex_type,
|
||||
ExtractorError,
|
||||
@@ -324,7 +325,7 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning(errmsg)
|
||||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True):
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None):
|
||||
""" Returns a tuple (page content as string, URL handle) """
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, (compat_str, str)):
|
||||
@@ -334,14 +335,11 @@ class InfoExtractor(object):
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal)
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
return (content, urlh)
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None):
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
@staticmethod
|
||||
def _guess_encoding_from_content(content_type, webpage_bytes):
|
||||
m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
|
||||
if m:
|
||||
encoding = m.group(1)
|
||||
@@ -354,6 +352,16 @@ class InfoExtractor(object):
|
||||
encoding = 'utf-16'
|
||||
else:
|
||||
encoding = 'utf-8'
|
||||
|
||||
return encoding
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||
content_type = urlh.headers.get('Content-Type', '')
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if not encoding:
|
||||
encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
|
||||
if self._downloader.params.get('dump_intermediate_pages', False):
|
||||
try:
|
||||
url = url_or_request.get_full_url()
|
||||
@@ -410,13 +418,13 @@ class InfoExtractor(object):
|
||||
|
||||
return content
|
||||
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5):
|
||||
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None):
|
||||
""" Returns the data of the page as a string """
|
||||
success = False
|
||||
try_count = 0
|
||||
while success is False:
|
||||
try:
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal)
|
||||
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
success = True
|
||||
except compat_http_client.IncompleteRead as e:
|
||||
try_count += 1
|
||||
@@ -431,10 +439,10 @@ class InfoExtractor(object):
|
||||
|
||||
def _download_xml(self, url_or_request, video_id,
|
||||
note='Downloading XML', errnote='Unable to download XML',
|
||||
transform_source=None, fatal=True):
|
||||
transform_source=None, fatal=True, encoding=None):
|
||||
"""Return the xml as an xml.etree.ElementTree.Element"""
|
||||
xml_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding)
|
||||
if xml_string is False:
|
||||
return xml_string
|
||||
if transform_source:
|
||||
@@ -445,9 +453,10 @@ class InfoExtractor(object):
|
||||
note='Downloading JSON metadata',
|
||||
errnote='Unable to download JSON metadata',
|
||||
transform_source=None,
|
||||
fatal=True):
|
||||
fatal=True, encoding=None):
|
||||
json_string = self._download_webpage(
|
||||
url_or_request, video_id, note, errnote, fatal=fatal)
|
||||
url_or_request, video_id, note, errnote, fatal=fatal,
|
||||
encoding=encoding)
|
||||
if (not fatal) and json_string is False:
|
||||
return None
|
||||
return self._parse_json(
|
||||
@@ -492,7 +501,7 @@ class InfoExtractor(object):
|
||||
|
||||
# Methods for following #608
|
||||
@staticmethod
|
||||
def url_result(url, ie=None, video_id=None):
|
||||
def url_result(url, ie=None, video_id=None, video_title=None):
|
||||
"""Returns a url that points to a page that should be processed"""
|
||||
# TODO: ie should be the class used for getting the info
|
||||
video_info = {'_type': 'url',
|
||||
@@ -500,6 +509,8 @@ class InfoExtractor(object):
|
||||
'ie_key': ie}
|
||||
if video_id is not None:
|
||||
video_info['id'] = video_id
|
||||
if video_title is not None:
|
||||
video_info['title'] = video_title
|
||||
return video_info
|
||||
|
||||
@staticmethod
|
||||
@@ -546,8 +557,7 @@ class InfoExtractor(object):
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract %s; '
|
||||
'please report this issue on http://yt-dl.org/bug' % _name)
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
@@ -698,7 +708,7 @@ class InfoExtractor(object):
|
||||
return self._html_search_meta('twitter:player', html,
|
||||
'twitter card player')
|
||||
|
||||
def _sort_formats(self, formats):
|
||||
def _sort_formats(self, formats, field_preference=None):
|
||||
if not formats:
|
||||
raise ExtractorError('No video formats found')
|
||||
|
||||
@@ -708,6 +718,9 @@ class InfoExtractor(object):
|
||||
if not f.get('ext') and 'url' in f:
|
||||
f['ext'] = determine_ext(f['url'])
|
||||
|
||||
if isinstance(field_preference, (list, tuple)):
|
||||
return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference)
|
||||
|
||||
preference = f.get('preference')
|
||||
if preference is None:
|
||||
proto = f.get('protocol')
|
||||
@@ -822,7 +835,7 @@ class InfoExtractor(object):
|
||||
(media_el.attrib.get('href') or media_el.attrib.get('url')))
|
||||
tbr = int_or_none(media_el.attrib.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])),
|
||||
'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])),
|
||||
'url': manifest_url,
|
||||
'ext': 'flv',
|
||||
'tbr': tbr,
|
||||
|
@@ -11,39 +11,65 @@ from ..utils import (
|
||||
|
||||
class CrackedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
|
||||
'md5': '4b29a5eeec292cd5eca6388c7558db9e',
|
||||
_TESTS = [{
|
||||
'url': 'http://www.cracked.com/video_19070_if-animal-actors-got-e21-true-hollywood-stories.html',
|
||||
'md5': '89b90b9824e3806ca95072c4d78f13f7',
|
||||
'info_dict': {
|
||||
'id': '19006',
|
||||
'id': '19070',
|
||||
'ext': 'mp4',
|
||||
'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
|
||||
'description': 'md5:3b909e752661db86007d10e5ec2df769',
|
||||
'timestamp': 1405659600,
|
||||
'upload_date': '20140718',
|
||||
'title': 'If Animal Actors Got E! True Hollywood Stories',
|
||||
'timestamp': 1404954000,
|
||||
'upload_date': '20140710',
|
||||
}
|
||||
}
|
||||
}, {
|
||||
# youtube embed
|
||||
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
|
||||
'md5': 'ccd52866b50bde63a6ef3b35016ba8c7',
|
||||
'info_dict': {
|
||||
'id': 'EjI00A3rZD0',
|
||||
'ext': 'mp4',
|
||||
'title': "4 Plot Holes You Didn't Notice in Your Favorite Movies - The Spit Take",
|
||||
'description': 'md5:c603708c718b796fe6079e2b3351ffc7',
|
||||
'upload_date': '20140725',
|
||||
'uploader_id': 'Cracked',
|
||||
'uploader': 'Cracked',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
youtube_url = self._search_regex(
|
||||
r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
|
||||
webpage, 'youtube url', default=None)
|
||||
if youtube_url:
|
||||
return self.url_result(youtube_url, 'Youtube')
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
|
||||
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'],
|
||||
webpage, 'video URL')
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
description = self._og_search_description(webpage)
|
||||
title = self._search_regex(
|
||||
[r'property="?og:title"?\s+content="([^"]+)"', r'class="?title"?>([^<]+)'],
|
||||
webpage, 'title')
|
||||
|
||||
timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
|
||||
description = self._search_regex(
|
||||
r'name="?(?:og:)?description"?\s+content="([^"]+)"',
|
||||
webpage, 'description', default=None)
|
||||
|
||||
timestamp = self._html_search_regex(
|
||||
r'"date"\s*:\s*"([^"]+)"', webpage, 'upload date', fatal=False)
|
||||
if timestamp:
|
||||
timestamp = parse_iso8601(timestamp[:-6])
|
||||
|
||||
view_count = str_to_int(self._html_search_regex(
|
||||
r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
|
||||
r'<span\s+class="?views"? id="?viewCounts"?>([\d,\.]+) Views</span>',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._html_search_regex(
|
||||
r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
|
||||
r'<span\s+id="?commentCounts"?>([\d,\.]+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
|
||||
if m:
|
||||
|
60
youtube_dl/extractor/crooksandliars.py
Normal file
60
youtube_dl/extractor/crooksandliars.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class CrooksAndLiarsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
|
||||
'info_dict': {
|
||||
'id': '8RUoRhRi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
|
||||
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
|
||||
'thumbnail': 're:^https?://.*\.jpg',
|
||||
'timestamp': 1428207000,
|
||||
'upload_date': '20150405',
|
||||
'uploader': 'Heather',
|
||||
'duration': 236,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
|
||||
|
||||
manifest = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
|
||||
video_id)
|
||||
|
||||
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
|
||||
|
||||
formats = [{
|
||||
'url': item['url'],
|
||||
'format_id': item['type'],
|
||||
'quality': quality(item['type']),
|
||||
} for item in manifest['flavors'] if item['mime'].startswith('video/')]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'id': video_id,
|
||||
'title': manifest['title'],
|
||||
'description': manifest.get('description'),
|
||||
'thumbnail': self._proto_relative_url(manifest.get('poster')),
|
||||
'timestamp': int_or_none(manifest.get('created')),
|
||||
'uploader': manifest.get('author'),
|
||||
'duration': int_or_none(manifest.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
@@ -23,7 +23,6 @@ from ..utils import (
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
inc,
|
||||
)
|
||||
|
||||
|
||||
@@ -102,13 +101,6 @@ class CrunchyrollIE(InfoExtractor):
|
||||
|
||||
key = obfuscate_key(id)
|
||||
|
||||
class Counter:
|
||||
__value = iv
|
||||
|
||||
def next_value(self):
|
||||
temp = self.__value
|
||||
self.__value = inc(self.__value)
|
||||
return temp
|
||||
decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
|
||||
return zlib.decompress(decrypted_data)
|
||||
|
||||
@@ -271,8 +263,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
streamdata = self._download_xml(
|
||||
streamdata_req, video_id,
|
||||
note='Downloading media info for %s' % video_format)
|
||||
video_url = streamdata.find('.//host').text
|
||||
video_play_path = streamdata.find('.//file').text
|
||||
video_url = streamdata.find('./host').text
|
||||
video_play_path = streamdata.find('./file').text
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'play_path': video_play_path,
|
||||
|
@@ -7,7 +7,10 @@ from ..utils import (
|
||||
int_or_none,
|
||||
unescapeHTML,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
determine_ext,
|
||||
)
|
||||
from .senateisvp import SenateISVPIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
@@ -35,11 +38,22 @@ class CSpanIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
|
||||
'md5': '446562a736c6bf97118e389433ed88d4',
|
||||
'info_dict': {
|
||||
'id': '342759',
|
||||
'ext': 'mp4',
|
||||
'title': 'General Motors Ignition Switch Recall',
|
||||
'duration': 14848,
|
||||
'description': 'md5:70c7c3b8fa63fa60d42772440596034c'
|
||||
},
|
||||
'playlist_duration_sum': 14855,
|
||||
}, {
|
||||
# Video from senate.gov
|
||||
'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -56,7 +70,7 @@ class CSpanIE(InfoExtractor):
|
||||
# present, otherwise this is a stripped version
|
||||
r'<p class=\'initial\'>(.*?)</p>'
|
||||
],
|
||||
webpage, 'description', flags=re.DOTALL)
|
||||
webpage, 'description', flags=re.DOTALL, default=None)
|
||||
|
||||
info_url = 'http://c-spanvideo.org/videoLibrary/assets/player/ajax-player.php?os=android&html5=program&id=' + video_id
|
||||
data = self._download_json(info_url, video_id)
|
||||
@@ -68,7 +82,16 @@ class CSpanIE(InfoExtractor):
|
||||
title = find_xpath_attr(doc, './/string', 'name', 'title').text
|
||||
thumbnail = find_xpath_attr(doc, './/string', 'name', 'poster').text
|
||||
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
surl = smuggle_url(senate_isvp_url, {'force_title': title})
|
||||
return self.url_result(surl, 'SenateISVP', video_id, title)
|
||||
|
||||
files = data['video']['files']
|
||||
try:
|
||||
capfile = data['video']['capfile']['#text']
|
||||
except KeyError:
|
||||
capfile = None
|
||||
|
||||
entries = [{
|
||||
'id': '%s_%d' % (video_id, partnum + 1),
|
||||
@@ -79,11 +102,22 @@ class CSpanIE(InfoExtractor):
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': int_or_none(f.get('length', {}).get('#text')),
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'url': capfile,
|
||||
'ext': determine_ext(capfile, 'dfxp')
|
||||
}],
|
||||
} if capfile else None,
|
||||
} for partnum, f in enumerate(files)]
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
}
|
||||
if len(entries) == 1:
|
||||
entry = dict(entries[0])
|
||||
entry['id'] = video_id
|
||||
return entry
|
||||
else:
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
@@ -25,8 +25,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
||||
def _build_request(url):
|
||||
"""Build a request with the family filter disabled"""
|
||||
request = compat_urllib_request.Request(url)
|
||||
request.add_header('Cookie', 'family_filter=off')
|
||||
request.add_header('Cookie', 'ff=off')
|
||||
request.add_header('Cookie', 'family_filter=off; ff=off')
|
||||
return request
|
||||
|
||||
|
||||
@@ -112,8 +111,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
|
||||
|
||||
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
|
||||
embed_page = self._download_webpage(embed_url, video_id,
|
||||
'Downloading embed page')
|
||||
embed_request = self._build_request(embed_url)
|
||||
embed_page = self._download_webpage(
|
||||
embed_request, video_id, 'Downloading embed page')
|
||||
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
|
||||
'video info', flags=re.MULTILINE)
|
||||
info = json.loads(info)
|
||||
@@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)'
|
||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||
|
73
youtube_dl/extractor/dhm.py
Normal file
73
youtube_dl/extractor/dhm.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
class DHMIE(InfoExtractor):
|
||||
IE_DESC = 'Filmarchiv - Deutsches Historisches Museum'
|
||||
_VALID_URL = r'https?://(?:www\.)?dhm\.de/filmarchiv/(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/',
|
||||
'md5': '11c475f670209bf6acca0b2b7ef51827',
|
||||
'info_dict': {
|
||||
'id': 'the-marshallplan-at-work-in-west-germany',
|
||||
'ext': 'flv',
|
||||
'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE',
|
||||
'description': 'md5:1fabd480c153f97b07add61c44407c82',
|
||||
'duration': 660,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.dhm.de/filmarchiv/02-mapping-the-wall/peter-g/rolle-1/',
|
||||
'md5': '09890226332476a3e3f6f2cb74734aa5',
|
||||
'info_dict': {
|
||||
'id': 'rolle-1',
|
||||
'ext': 'flv',
|
||||
'title': 'ROLLE 1',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
playlist_url = self._search_regex(
|
||||
r"file\s*:\s*'([^']+)'", webpage, 'playlist url')
|
||||
|
||||
playlist = self._download_xml(playlist_url, video_id)
|
||||
|
||||
track = playlist.find(
|
||||
'./{http://xspf.org/ns/0/}trackList/{http://xspf.org/ns/0/}track')
|
||||
|
||||
video_url = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}location',
|
||||
'video url', fatal=True)
|
||||
thumbnail = xpath_text(
|
||||
track, './{http://xspf.org/ns/0/}image',
|
||||
'thumbnail')
|
||||
|
||||
title = self._search_regex(
|
||||
[r'dc:title="([^"]+)"', r'<title> »([^<]+)</title>'],
|
||||
webpage, 'title').strip()
|
||||
description = self._html_search_regex(
|
||||
r'<p><strong>Description:</strong>(.+?)</p>',
|
||||
webpage, 'description', default=None)
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'<em>Length\s*</em>\s*:\s*</strong>([^<]+)',
|
||||
webpage, 'duration', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@@ -36,7 +36,8 @@ class DotsubIE(InfoExtractor):
|
||||
if not video_url:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._search_regex(
|
||||
r'"file"\s*:\s*\'([^\']+)', webpage, 'video url')
|
||||
[r'<source[^>]+src="([^"]+)"', r'"file"\s*:\s*\'([^\']+)'],
|
||||
webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
112
youtube_dl/extractor/douyutv.py
Normal file
112
youtube_dl/extractor/douyutv.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (ExtractorError, unescapeHTML)
|
||||
from ..compat import (compat_str, compat_basestring)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:c93d6692dde6fe33809a46edcbecca44',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
'uploader_id': '431925',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.douyutv.com/85982',
|
||||
'info_dict': {
|
||||
'id': '85982',
|
||||
'display_id': '85982',
|
||||
'ext': 'flv',
|
||||
'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'uploader': 'douyu小漠',
|
||||
'uploader_id': '3769985',
|
||||
'is_live': True,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id.isdigit():
|
||||
room_id = video_id
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id"\s*:\s*(\d+),', page, 'room id')
|
||||
|
||||
prefix = 'room/%s?aid=android&client_sys=android&time=%d' % (
|
||||
room_id, int(time.time()))
|
||||
|
||||
auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest()
|
||||
config = self._download_json(
|
||||
'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth),
|
||||
video_id)
|
||||
|
||||
data = config['data']
|
||||
|
||||
error_code = config.get('error', 0)
|
||||
if error_code is not 0:
|
||||
error_desc = 'Server reported error %i' % error_code
|
||||
if isinstance(data, (compat_str, compat_basestring)):
|
||||
error_desc += ': ' + data
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
show_status = data.get('show_status')
|
||||
# 1 = live, 2 = offline
|
||||
if show_status == '2':
|
||||
raise ExtractorError(
|
||||
'Live stream is offline', expected=True)
|
||||
|
||||
base_url = data['rtmp_url']
|
||||
live_path = data['rtmp_live']
|
||||
|
||||
title = self._live_title(unescapeHTML(data['room_name']))
|
||||
description = data.get('show_details')
|
||||
thumbnail = data.get('room_src')
|
||||
|
||||
uploader = data.get('nickname')
|
||||
uploader_id = data.get('owner_uid')
|
||||
|
||||
multi_formats = data.get('rtmp_multi_bitrate')
|
||||
if not isinstance(multi_formats, dict):
|
||||
multi_formats = {}
|
||||
multi_formats['live'] = live_path
|
||||
|
||||
formats = [{
|
||||
'url': '%s/%s' % (base_url, format_path),
|
||||
'format_id': format_id,
|
||||
'preference': 1 if format_id == 'live' else 0,
|
||||
} for format_id, format_path in multi_formats.items()]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
@@ -3,22 +3,25 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class DreiSatIE(InfoExtractor):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
|
||||
_TEST = {
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?obj=36983',
|
||||
'md5': '9dcfe344732808dbfcc901537973c922',
|
||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||
'md5': 'be37228896d30a88f315b638900a026e',
|
||||
'info_dict': {
|
||||
'id': '36983',
|
||||
'id': '45918',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kaffeeland Schweiz',
|
||||
'description': 'md5:cc4424b18b75ae9948b13929a0814033',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'uploader': '3sat',
|
||||
'upload_date': '20130622'
|
||||
'upload_date': '20140913'
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor):
|
||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
|
||||
details_doc = self._download_xml(details_url, video_id, 'Downloading video details')
|
||||
|
||||
status_code = details_doc.find('./status/statuscode')
|
||||
if status_code is not None and status_code.text != 'ok':
|
||||
code = status_code.text
|
||||
if code == 'notVisibleAnymore':
|
||||
message = 'Video %s is not available' % video_id
|
||||
else:
|
||||
message = '%s returned error: %s' % (self.IE_NAME, code)
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
thumbnail_els = details_doc.findall('.//teaserimage')
|
||||
thumbnails = [{
|
||||
'width': int(te.attrib['key'].partition('x')[0]),
|
||||
|
@@ -1,3 +1,4 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor, ExtractorError
|
||||
@@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8',
|
||||
'md5': '4a7e1dd65cdb2643500a3f753c942f25',
|
||||
'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5',
|
||||
'md5': 'dc515a9ab50577fa14cc4e4b0265168f',
|
||||
'info_dict': {
|
||||
'id': 'partiets-mand-7-8',
|
||||
'id': 'panisk-paske-5',
|
||||
'ext': 'mp4',
|
||||
'title': 'Partiets mand (7:8)',
|
||||
'description': 'md5:a684b90a8f9336cd4aab94b7647d7862',
|
||||
'timestamp': 1403047940,
|
||||
'upload_date': '20140617',
|
||||
'duration': 1299.040,
|
||||
'title': 'Panisk Påske (5)',
|
||||
'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c',
|
||||
'timestamp': 1426984612,
|
||||
'upload_date': '20150322',
|
||||
'duration': 1455,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>Programmet er ikke længere tilgængeligt' in webpage:
|
||||
raise ExtractorError(
|
||||
'Video %s is not available' % video_id, expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-(?:material-identifier|episode-slug)="([^"]+)"',
|
||||
webpage, 'video id')
|
||||
|
@@ -28,12 +28,12 @@ class DumpIE(InfoExtractor):
|
||||
video_url = self._search_regex(
|
||||
r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL')
|
||||
|
||||
thumb = self._og_search_thumbnail(webpage)
|
||||
title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title')
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'thumbnail': thumb,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
60
youtube_dl/extractor/dumpert.py
Normal file
60
youtube_dl/extractor/dumpert.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import qualities
|
||||
|
||||
|
||||
class DumpertIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?dumpert\.nl/mediabase/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
|
||||
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
|
||||
'info_dict': {
|
||||
'id': '6646981/951bc60f',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ik heb nieuws voor je',
|
||||
'description': 'Niet schrikken hoor',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
req = compat_urllib_request.Request(url)
|
||||
req.add_header('Cookie', 'nsfw=1')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
files_base64 = self._search_regex(
|
||||
r'data-files="([^"]+)"', webpage, 'data files')
|
||||
|
||||
files = self._parse_json(
|
||||
base64.b64decode(files_base64.encode('utf-8')).decode('utf-8'),
|
||||
video_id)
|
||||
|
||||
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
|
||||
|
||||
formats = [{
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'quality': quality(format_id),
|
||||
} for format_id, video_url in files.items() if format_id != 'still']
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'title', webpage) or self._og_search_title(webpage)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage) or self._og_search_description(webpage)
|
||||
thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats
|
||||
}
|
@@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor):
|
||||
'duration': 216,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}]
|
||||
|
||||
def _handle_error(self, response):
|
||||
|
@@ -6,56 +6,42 @@ import json
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class EllenTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
|
||||
'md5': 'e4af06f3bf0d5f471921a18db5764642',
|
||||
_TEST = {
|
||||
'url': 'http://www.ellentv.com/videos/0-ipq1gsai/',
|
||||
'md5': '8e3c576bf2e9bfff4d76565f56f94c9c',
|
||||
'info_dict': {
|
||||
'id': '0-7jqrsr18',
|
||||
'id': '0_ipq1gsai',
|
||||
'ext': 'mp4',
|
||||
'title': 'What\'s Wrong with These Photos? A Whole Lot',
|
||||
'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6',
|
||||
'timestamp': 1406876400,
|
||||
'upload_date': '20140801',
|
||||
'title': 'Fast Fingers of Fate',
|
||||
'description': 'md5:587e79fbbd0d73b148bc596d99ce48e6',
|
||||
'timestamp': 1428035648,
|
||||
'upload_date': '20150403',
|
||||
'uploader_id': 'batchUser',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ellentube.com/videos/0-dvzmabd5/',
|
||||
'md5': '98238118eaa2bbdf6ad7f708e3e4f4eb',
|
||||
'info_dict': {
|
||||
'id': '0-dvzmabd5',
|
||||
'ext': 'mp4',
|
||||
'title': '1 year old twin sister makes her brother laugh',
|
||||
'description': '1 year old twin sister makes her brother laugh',
|
||||
'timestamp': 1419542075,
|
||||
'upload_date': '20141225',
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_url = self._html_search_meta('VideoURL', webpage, 'url')
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'pageName\s*=\s*"([^"]+)"', webpage, 'title')
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description') or self._og_search_description(webpage)
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<span class="publish-date"><time datetime="([^"]+)">',
|
||||
webpage, 'timestamp'))
|
||||
webpage = self._download_webpage(
|
||||
'http://widgets.ellentube.com/videos/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
}
|
||||
partner_id = self._search_regex(
|
||||
r"var\s+partnerId\s*=\s*'([^']+)", webpage, 'partner id')
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
[r'id="kaltura_player_([^"]+)"',
|
||||
r"_wb_entry_id\s*:\s*'([^']+)",
|
||||
r'data-kaltura-entry-id="([^"]+)'],
|
||||
webpage, 'kaltura id')
|
||||
|
||||
return self.url_result('kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura')
|
||||
|
||||
|
||||
class EllenTVClipsIE(InfoExtractor):
|
||||
@@ -67,7 +53,7 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
'id': 'meryl-streep-vanessa-hudgens',
|
||||
'title': 'Meryl Streep, Vanessa Hudgens',
|
||||
},
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 7,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -91,4 +77,8 @@ class EllenTVClipsIE(InfoExtractor):
|
||||
raise ExtractorError('Failed to download JSON', cause=ve)
|
||||
|
||||
def _extract_entries(self, playlist):
|
||||
return [self.url_result(item['url'], 'EllenTV') for item in playlist]
|
||||
return [
|
||||
self.url_result(
|
||||
'kaltura:%s:%s' % (item['kaltura_partner_id'], item['kaltura_entry_id']),
|
||||
'Kaltura')
|
||||
for item in playlist]
|
||||
|
@@ -1,11 +1,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class EroProfileIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?'
|
||||
_NETRC_MACHINE = 'eroprofile'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
|
||||
'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
|
||||
'info_dict': {
|
||||
@@ -16,13 +22,55 @@ class EroProfileIE(InfoExtractor):
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
||||
'md5': '1baa9602ede46ce904c431f5418d8916',
|
||||
'info_dict': {
|
||||
'id': '1133519',
|
||||
'ext': 'm4v',
|
||||
'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
return
|
||||
|
||||
query = compat_urllib_parse.urlencode({
|
||||
'username': username,
|
||||
'password': password,
|
||||
'url': 'http://www.eroprofile.com/',
|
||||
})
|
||||
login_url = self._LOGIN_URL + query
|
||||
login_page = self._download_webpage(login_url, None, False)
|
||||
|
||||
m = re.search(r'Your username or password was incorrect\.', login_page)
|
||||
if m:
|
||||
raise ExtractorError(
|
||||
'Wrong username and/or password.', expected=True)
|
||||
|
||||
self.report_login()
|
||||
redirect_url = self._search_regex(
|
||||
r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url')
|
||||
self._download_webpage(redirect_url, None, False)
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
m = re.search(r'You must be logged in to view this video\.', webpage)
|
||||
if m:
|
||||
raise ExtractorError(
|
||||
'This video requires login. Please specify a username and password and try again.', expected=True)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||
webpage, 'video id', default=None)
|
||||
|
@@ -1,128 +1,105 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
clean_html,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
def _decrypt_config(key, string):
|
||||
a = ''
|
||||
i = ''
|
||||
r = ''
|
||||
|
||||
while len(a) < (len(string) / 2):
|
||||
a += key
|
||||
|
||||
a = a[0:int(len(string) / 2)]
|
||||
|
||||
t = 0
|
||||
while t < len(string):
|
||||
i += chr(int(string[t] + string[t + 1], 16))
|
||||
t += 2
|
||||
|
||||
icko = [s for s in i]
|
||||
|
||||
for t, c in enumerate(a):
|
||||
r += chr(ord(c) ^ ord(icko[t]))
|
||||
|
||||
return r
|
||||
|
||||
|
||||
class EscapistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
|
||||
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
|
||||
'md5': 'c6793dbda81388f4264c1ba18684a74d',
|
||||
'info_dict': {
|
||||
'id': '6618',
|
||||
'ext': 'mp4',
|
||||
'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
|
||||
'uploader_id': 'the-escapist-presents',
|
||||
'uploader': 'The Escapist Presents',
|
||||
'title': "Breaking Down Baldur's Gate",
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 264,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.escapistmagazine.com/videos/view/zero-punctuation/10044-Evolve-One-vs-Multiplayer',
|
||||
'md5': 'cf8842a8a46444d241f9a9980d7874f2',
|
||||
'info_dict': {
|
||||
'id': '10044',
|
||||
'ext': 'mp4',
|
||||
'description': 'This week, Zero Punctuation reviews Evolve.',
|
||||
'title': 'Evolve - One vs Multiplayer',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 304,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage_req = compat_urllib_request.Request(url)
|
||||
webpage_req.add_header('User-Agent', self._USER_AGENT)
|
||||
webpage = self._download_webpage(webpage_req, video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
uploader_id = self._html_search_regex(
|
||||
r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
|
||||
webpage, 'uploader ID', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r"<h1\s+class='headline'>(.*?)</a>",
|
||||
webpage, 'uploader', fatal=False)
|
||||
description = self._html_search_meta('description', webpage)
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
imsVideo = self._parse_json(
|
||||
self._search_regex(
|
||||
r'imsVideo\.play\(({.+?})\);', webpage, 'imsVideo'),
|
||||
video_id)
|
||||
video_id = imsVideo['videoID']
|
||||
key = imsVideo['hash']
|
||||
|
||||
raw_title = self._html_search_meta('title', webpage, fatal=True)
|
||||
title = raw_title.partition(' : ')[2]
|
||||
|
||||
config_url = compat_urllib_parse.unquote(self._html_search_regex(
|
||||
r'''(?x)
|
||||
(?:
|
||||
<param\s+name="flashvars".*?\s+value="config=|
|
||||
flashvars="config=
|
||||
)
|
||||
(https?://[^"&]+)
|
||||
''',
|
||||
webpage, 'config URL'))
|
||||
quality = qualities(['lq', 'hq', 'hd'])
|
||||
|
||||
formats = []
|
||||
ad_formats = []
|
||||
for q in ['lq', 'hq', 'hd']:
|
||||
config_req = compat_urllib_request.Request('http://www.escapistmagazine.com/videos/'
|
||||
'vidconfig.php?videoID=%s&hash=%s&quality=%s' % (video_id, key, 'mp4_' + q))
|
||||
config_req.add_header('Referer', url)
|
||||
config = self._download_webpage(config_req, video_id, 'Downloading video config ' + q.upper())
|
||||
|
||||
def _add_format(name, cfg_url, quality):
|
||||
cfg_req = compat_urllib_request.Request(cfg_url)
|
||||
cfg_req.add_header('User-Agent', self._USER_AGENT)
|
||||
config = self._download_json(
|
||||
cfg_req, video_id,
|
||||
'Downloading ' + name + ' configuration',
|
||||
'Unable to download ' + name + ' configuration',
|
||||
transform_source=js_to_json)
|
||||
data = json.loads(_decrypt_config(key, config))
|
||||
|
||||
playlist = config['playlist']
|
||||
for p in playlist:
|
||||
if p.get('eventCategory') == 'Video':
|
||||
ar = formats
|
||||
elif p.get('eventCategory') == 'Video Postroll':
|
||||
ar = ad_formats
|
||||
else:
|
||||
continue
|
||||
title = clean_html(data['videoData']['title'])
|
||||
duration = data['videoData']['duration'] / 1000
|
||||
|
||||
ar.append({
|
||||
'url': p['url'],
|
||||
'format_id': name,
|
||||
'quality': quality,
|
||||
'http_headers': {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
},
|
||||
})
|
||||
for i, v in enumerate(data['files']['videos']):
|
||||
|
||||
_add_format('normal', config_url, quality=0)
|
||||
hq_url = (config_url +
|
||||
('&hq=1' if '?' in config_url else config_url + '?hq=1'))
|
||||
try:
|
||||
_add_format('hq', hq_url, quality=1)
|
||||
except ExtractorError:
|
||||
pass # That's fine, we'll just use normal quality
|
||||
self._sort_formats(formats)
|
||||
formats.append({
|
||||
'url': v,
|
||||
'format_id': determine_ext(v) + '_' + q + str(i),
|
||||
'quality': quality(q),
|
||||
})
|
||||
|
||||
if '/escapist/sales-marketing/' in formats[-1]['url']:
|
||||
raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
|
||||
|
||||
res = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': title,
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': description,
|
||||
'description': self._og_search_description(webpage),
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
if self._downloader.params.get('include_ads') and ad_formats:
|
||||
self._sort_formats(ad_formats)
|
||||
ad_res = {
|
||||
'id': '%s-ad' % video_id,
|
||||
'title': '%s (Postroll)' % title,
|
||||
'formats': ad_formats,
|
||||
}
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [res, ad_res],
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
return res
|
||||
|
@@ -24,8 +24,12 @@ class FacebookIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:\w+\.)?facebook\.com/
|
||||
(?:[^#]*?\#!/)?
|
||||
(?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=(?P<id>[0-9]+)
|
||||
(?:
|
||||
(?:video/video\.php|photo\.php|video\.php|video/embed)\?(?:.*?)
|
||||
(?:v|video_id)=|
|
||||
[^/]+/videos/(?:[^/]+/)?
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?:.*)'''
|
||||
_LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
|
||||
_CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
|
||||
@@ -50,6 +54,12 @@ class FacebookIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _login(self):
|
||||
|
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unescapeHTML,
|
||||
find_xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,25 +30,31 @@ class FlickrIE(InfoExtractor):
|
||||
video_id = mobj.group('id')
|
||||
video_uploader_id = mobj.group('uploader_id')
|
||||
webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
|
||||
webpage = self._download_webpage(webpage_url, video_id)
|
||||
req = compat_urllib_request.Request(webpage_url)
|
||||
req.add_header(
|
||||
'User-Agent',
|
||||
# it needs a more recent version
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20150101 Firefox/38.0 (Chrome)')
|
||||
webpage = self._download_webpage(req, video_id)
|
||||
|
||||
secret = self._search_regex(r"photo_secret: '(\w+)'", webpage, 'secret')
|
||||
secret = self._search_regex(r'secret"\s*:\s*"(\w+)"', webpage, 'secret')
|
||||
|
||||
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
|
||||
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
|
||||
first_xml = self._download_xml(first_url, video_id, 'Downloading first data webpage')
|
||||
|
||||
node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
|
||||
first_xml, 'node_id')
|
||||
node_id = find_xpath_attr(
|
||||
first_xml, './/{http://video.yahoo.com/YEP/1.0/}Item', 'id',
|
||||
'id').text
|
||||
|
||||
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
|
||||
second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
|
||||
second_xml = self._download_xml(second_url, video_id, 'Downloading second data webpage')
|
||||
|
||||
self.report_extraction(video_id)
|
||||
|
||||
mobj = re.search(r'<STREAM APP="(.+?)" FULLPATH="(.+?)"', second_xml)
|
||||
if mobj is None:
|
||||
stream = second_xml.find('.//STREAM')
|
||||
if stream is None:
|
||||
raise ExtractorError('Unable to extract video url')
|
||||
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
|
||||
video_url = stream.attrib['APP'] + stream.attrib['FULLPATH']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -6,14 +6,21 @@ from .common import InfoExtractor
|
||||
|
||||
class FootyRoomIE(InfoExtractor):
|
||||
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
|
||||
'info_dict': {
|
||||
'id': 'schalke-04-0-2-real-madrid-2015-02',
|
||||
'title': 'Schalke 04 0 – 2 Real Madrid',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://footyroom.com/georgia-0-2-germany-2015-03/',
|
||||
'info_dict': {
|
||||
'id': 'georgia-0-2-germany-2015-03',
|
||||
'title': 'Georgia 0 – 2 Germany',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -36,6 +43,7 @@ class FootyRoomIE(InfoExtractor):
|
||||
r'data-config="([^"]+)"', payload,
|
||||
'playwire url', default=None)
|
||||
if playwire_url:
|
||||
entries.append(self.url_result(playwire_url, 'Playwire'))
|
||||
entries.append(self.url_result(self._proto_relative_url(
|
||||
playwire_url, 'http:'), 'Playwire'))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||
|
@@ -14,7 +14,9 @@ from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
parse_duration,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -50,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = video['format']
|
||||
if video_url.endswith('.f4m'):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'f4m':
|
||||
if georestricted:
|
||||
# See https://github.com/rg3/youtube-dl/issues/3963
|
||||
# m3u8 urls work fine
|
||||
@@ -60,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path,
|
||||
video_id, 'Downloading f4m manifest token', fatal=False)
|
||||
if f4m_url:
|
||||
f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
|
||||
for f4m_format in f4m_formats:
|
||||
f4m_format['preference'] = 1
|
||||
formats.extend(f4m_formats)
|
||||
elif video_url.endswith('.m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4'))
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
elif video_url.startswith('rtmp'):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
@@ -86,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
'title': info['titre'],
|
||||
'description': clean_html(info['synopsis']),
|
||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
||||
'duration': parse_duration(info['duree']),
|
||||
'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']),
|
||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
||||
'formats': formats,
|
||||
}
|
||||
@@ -260,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor):
|
||||
_VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553',
|
||||
'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6',
|
||||
'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511',
|
||||
'md5': '9b88dc156781c4dbebd4c3e066e0b1d6',
|
||||
'info_dict': {
|
||||
'id': 'EV_22853',
|
||||
'id': 'EV_50111',
|
||||
'ext': 'flv',
|
||||
'title': 'Dans les jardins de William Christie - Le Camus',
|
||||
'description': 'md5:4710c82315c40f0c865ca8b9a68b5299',
|
||||
'upload_date': '20140829',
|
||||
'timestamp': 1409317200,
|
||||
'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne",
|
||||
'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9',
|
||||
'upload_date': '20150320',
|
||||
'timestamp': 1426892400,
|
||||
'duration': 2760.9,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
name = mobj.group('name')
|
||||
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
if ">Ce live n'est plus disponible en replay<" in webpage:
|
||||
raise ExtractorError('Video %s is not available' % name, expected=True)
|
||||
|
||||
video_id, catalogue = self._search_regex(
|
||||
r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@')
|
||||
|
||||
|
70
youtube_dl/extractor/gamersyde.py
Normal file
70
youtube_dl/extractor/gamersyde.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
remove_start,
|
||||
)
|
||||
|
||||
|
||||
class GamersydeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
|
||||
_TEST = {
|
||||
'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
|
||||
'md5': 'f38d400d32f19724570040d5ce3a505f',
|
||||
'info_dict': {
|
||||
'id': '34371',
|
||||
'ext': 'mp4',
|
||||
'duration': 372,
|
||||
'title': 'Bloodborne - Birth of a hero',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playlist = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
|
||||
display_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for source in playlist['sources']:
|
||||
video_url = source.get('file')
|
||||
if not video_url:
|
||||
continue
|
||||
format_id = source.get('label')
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
}
|
||||
m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
|
||||
if m:
|
||||
f.update({
|
||||
'height': int(m.group('height')),
|
||||
'fps': int(m.group('fps')),
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = remove_start(playlist['title'], '%s - ' % video_id)
|
||||
thumbnail = playlist.get('image')
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
@@ -11,7 +11,7 @@ from ..utils import remove_end
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?'
|
||||
_NETRC_MACHINE = 'gdcvault'
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -19,6 +19,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
|
||||
'info_dict': {
|
||||
'id': '1019721',
|
||||
'display_id': 'Doki-Doki-Universe-Sweet-Simple',
|
||||
'ext': 'mp4',
|
||||
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
|
||||
}
|
||||
@@ -27,6 +28,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
|
||||
'info_dict': {
|
||||
'id': '1015683',
|
||||
'display_id': 'Embracing-the-Dark-Art-of',
|
||||
'ext': 'flv',
|
||||
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
|
||||
},
|
||||
@@ -39,10 +41,15 @@ class GDCVaultIE(InfoExtractor):
|
||||
'md5': 'a5eb77996ef82118afbbe8e48731b98e',
|
||||
'info_dict': {
|
||||
'id': '1015301',
|
||||
'display_id': 'Thexder-Meets-Windows-95-or',
|
||||
'ext': 'flv',
|
||||
'title': 'Thexder Meets Windows 95, or Writing Great Games in the Windows 95 Environment',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
},
|
||||
{
|
||||
'url': 'http://gdcvault.com/play/1020791/',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
@@ -90,7 +97,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
})
|
||||
return video_formats
|
||||
|
||||
def _login(self, webpage_url, video_id):
|
||||
def _login(self, webpage_url, display_id):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
|
||||
@@ -107,9 +114,9 @@ class GDCVaultIE(InfoExtractor):
|
||||
|
||||
request = compat_urllib_request.Request(login_url, compat_urllib_parse.urlencode(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
self._download_webpage(request, video_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, video_id, 'Getting authenticated video page')
|
||||
self._download_webpage(logout_url, video_id, 'Logging out')
|
||||
self._download_webpage(request, display_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
||||
self._download_webpage(logout_url, display_id, 'Logging out')
|
||||
|
||||
return start_page
|
||||
|
||||
@@ -117,8 +124,10 @@ class GDCVaultIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('name') or video_id
|
||||
|
||||
webpage_url = 'http://www.gdcvault.com/play/' + video_id
|
||||
start_page = self._download_webpage(webpage_url, video_id)
|
||||
start_page = self._download_webpage(webpage_url, display_id)
|
||||
|
||||
direct_url = self._search_regex(
|
||||
r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
|
||||
@@ -131,6 +140,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'ext': 'flv',
|
||||
'title': title,
|
||||
@@ -141,7 +151,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
start_page, 'xml root', default=None)
|
||||
if xml_root is None:
|
||||
# Probably need to authenticate
|
||||
login_res = self._login(webpage_url, video_id)
|
||||
login_res = self._login(webpage_url, display_id)
|
||||
if login_res is None:
|
||||
self.report_warning('Could not login.')
|
||||
else:
|
||||
@@ -159,7 +169,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
xml_name = self._html_search_regex(r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>', start_page, 'xml filename')
|
||||
|
||||
xml_decription_url = xml_root + 'xml/' + xml_name
|
||||
xml_description = self._download_xml(xml_decription_url, video_id)
|
||||
xml_description = self._download_xml(xml_decription_url, display_id)
|
||||
|
||||
video_title = xml_description.find('./metadata/title').text
|
||||
video_formats = self._parse_mp4(xml_description)
|
||||
@@ -168,6 +178,7 @@ class GDCVaultIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'formats': video_formats,
|
||||
}
|
||||
|
@@ -29,10 +29,14 @@ from ..utils import (
|
||||
xpath_text,
|
||||
)
|
||||
from .brightcove import BrightcoveIE
|
||||
from .nbc import NBCSportsVPlayerIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .smotri import SmotriIE
|
||||
from .condenast import CondeNastIE
|
||||
from .udn import UDNEmbedIE
|
||||
from .senateisvp import SenateISVPIE
|
||||
from .bliptv import BlipTVIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@@ -527,6 +531,17 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Viddler'],
|
||||
},
|
||||
# Libsyn embed
|
||||
{
|
||||
'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
},
|
||||
# jwplayer YouTube
|
||||
{
|
||||
'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
|
||||
@@ -602,13 +617,34 @@ class GenericIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
'age_limit': 0,
|
||||
},
|
||||
},
|
||||
# Playwire embed
|
||||
{
|
||||
'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
|
||||
'info_dict': {
|
||||
'id': '3519514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
|
||||
'thumbnail': 're:^https?://.*\.png$',
|
||||
'duration': 45.115,
|
||||
},
|
||||
},
|
||||
# 5min embed
|
||||
{
|
||||
'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
|
||||
'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
|
||||
'info_dict': {
|
||||
'id': '518726732',
|
||||
'ext': 'mp4',
|
||||
'title': 'Facebook Creates "On This Day" | Crunch Report',
|
||||
},
|
||||
},
|
||||
# RSS feed with enclosure
|
||||
{
|
||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||
@@ -618,6 +654,81 @@ class GenericIE(InfoExtractor):
|
||||
'upload_date': '20150228',
|
||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||
}
|
||||
},
|
||||
# Crooks and Liars embed
|
||||
{
|
||||
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
|
||||
'info_dict': {
|
||||
'id': '8RUoRhRi',
|
||||
'ext': 'mp4',
|
||||
'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
|
||||
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
|
||||
'timestamp': 1428207000,
|
||||
'upload_date': '20150405',
|
||||
'uploader': 'Heather',
|
||||
},
|
||||
},
|
||||
# Crooks and Liars external embed
|
||||
{
|
||||
'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
|
||||
'info_dict': {
|
||||
'id': 'MTE3MjUtMzQ2MzA',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
|
||||
'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
|
||||
'timestamp': 1265032391,
|
||||
'upload_date': '20100201',
|
||||
'uploader': 'Heather',
|
||||
},
|
||||
},
|
||||
# NBC Sports vplayer embed
|
||||
{
|
||||
'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
|
||||
'info_dict': {
|
||||
'id': 'ln7x1qSThw4k',
|
||||
'ext': 'flv',
|
||||
'title': "PFT Live: New leader in the 'new-look' defense",
|
||||
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
|
||||
},
|
||||
},
|
||||
# UDN embed
|
||||
{
|
||||
'url': 'http://www.udn.com/news/story/7314/822787',
|
||||
'md5': 'fd2060e988c326991037b9aff9df21a6',
|
||||
'info_dict': {
|
||||
'id': '300346',
|
||||
'ext': 'mp4',
|
||||
'title': '中一中男師變性 全校師生力挺',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
}
|
||||
},
|
||||
# Ooyala embed
|
||||
{
|
||||
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
|
||||
'info_dict': {
|
||||
'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',
|
||||
'ext': 'mp4',
|
||||
'description': 'VIDEO: Index/Match versus VLOOKUP.',
|
||||
'title': 'This is what separates the Excel masters from the wannabes',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
# Contains a SMIL manifest
|
||||
{
|
||||
'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
|
||||
'info_dict': {
|
||||
'id': 'file',
|
||||
'ext': 'flv',
|
||||
'title': '+ Football: Lottery Champions League Europe',
|
||||
'uploader': 'www.telewebion.com',
|
||||
},
|
||||
'params': {
|
||||
# rtmpe downloads
|
||||
'skip_download': True,
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
@@ -963,12 +1074,9 @@ class GenericIE(InfoExtractor):
|
||||
}
|
||||
|
||||
# Look for embedded blip.tv player
|
||||
mobj = re.search(r'<meta\s[^>]*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV')
|
||||
mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage)
|
||||
if mobj:
|
||||
return self.url_result(mobj.group(1), 'BlipTV')
|
||||
bliptv_url = BlipTVIE._extract_url(webpage)
|
||||
if bliptv_url:
|
||||
return self.url_result(bliptv_url, 'BlipTV')
|
||||
|
||||
# Look for embedded condenast player
|
||||
matches = re.findall(
|
||||
@@ -1006,10 +1114,24 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for NYTimes player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
|
||||
webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Libsyn player
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for Ooyala videos
|
||||
mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
|
||||
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage))
|
||||
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
|
||||
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
|
||||
if mobj is not None:
|
||||
return OoyalaIE._build_url_result(mobj.group('ec'))
|
||||
|
||||
@@ -1212,6 +1334,41 @@ class GenericIE(InfoExtractor):
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Pladform')
|
||||
|
||||
# Look for Playwire embeds
|
||||
mobj = re.search(
|
||||
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for 5min embeds
|
||||
mobj = re.search(
|
||||
r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
|
||||
|
||||
# Look for Crooks and Liars embeds
|
||||
mobj = re.search(
|
||||
r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'))
|
||||
|
||||
# Look for NBC Sports VPlayer embeds
|
||||
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
|
||||
if nbc_sports_url:
|
||||
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||
|
||||
# Look for UDN embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
|
||||
if mobj is not None:
|
||||
return self.url_result(
|
||||
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
|
||||
|
||||
# Look for Senate ISVP iframe
|
||||
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
|
||||
if senate_isvp_url:
|
||||
return self.url_result(surl, 'SenateISVP')
|
||||
|
||||
def check_video(vurl):
|
||||
if YoutubeIE.suitable(vurl):
|
||||
return True
|
||||
@@ -1268,10 +1425,16 @@ class GenericIE(InfoExtractor):
|
||||
# HTML5 video
|
||||
found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
|
||||
if not found:
|
||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||
found = re.search(
|
||||
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)',
|
||||
r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
|
||||
webpage)
|
||||
if not found:
|
||||
# Look also in Refresh HTTP header
|
||||
refresh_header = head_response.headers.get('Refresh')
|
||||
if refresh_header:
|
||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||
if found:
|
||||
new_url = found.group(1)
|
||||
self.report_following_redirect(new_url)
|
||||
@@ -1295,13 +1458,22 @@ class GenericIE(InfoExtractor):
|
||||
# here's a fun little line of code for you:
|
||||
video_id = os.path.splitext(video_id)[0]
|
||||
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
if determine_ext(video_url) == 'smil':
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_smil_formats(video_url, video_id),
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
else:
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': video_uploader,
|
||||
'title': video_title,
|
||||
'age_limit': age_limit,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
90
youtube_dl/extractor/gfycat.py
Normal file
90
youtube_dl/extractor/gfycat.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
qualities,
|
||||
)
|
||||
|
||||
|
||||
class GfycatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
|
||||
'info_dict': {
|
||||
'id': 'DeadlyDecisiveGermanpinscher',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ghost in the Shell',
|
||||
'timestamp': 1410656006,
|
||||
'upload_date': '20140914',
|
||||
'uploader': 'anonymous',
|
||||
'duration': 10.4,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
gfy = self._download_json(
|
||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
||||
video_id, 'Downloading video info')['gfyItem']
|
||||
|
||||
title = gfy.get('title') or gfy['gfyName']
|
||||
description = gfy.get('description')
|
||||
timestamp = int_or_none(gfy.get('createDate'))
|
||||
uploader = gfy.get('userName')
|
||||
view_count = int_or_none(gfy.get('views'))
|
||||
like_count = int_or_none(gfy.get('likes'))
|
||||
dislike_count = int_or_none(gfy.get('dislikes'))
|
||||
age_limit = 18 if gfy.get('nsfw') == '1' else 0
|
||||
|
||||
width = int_or_none(gfy.get('width'))
|
||||
height = int_or_none(gfy.get('height'))
|
||||
fps = int_or_none(gfy.get('frameRate'))
|
||||
num_frames = int_or_none(gfy.get('numFrames'))
|
||||
|
||||
duration = float_or_none(num_frames, fps) if num_frames and fps else None
|
||||
|
||||
categories = gfy.get('tags') or gfy.get('extraLemmas') or []
|
||||
|
||||
FORMATS = ('gif', 'webm', 'mp4')
|
||||
quality = qualities(FORMATS)
|
||||
|
||||
formats = []
|
||||
for format_id in FORMATS:
|
||||
video_url = gfy.get('%sUrl' % format_id)
|
||||
if not video_url:
|
||||
continue
|
||||
filesize = gfy.get('%sSize' % format_id)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
'filesize': filesize,
|
||||
'quality': quality(format_id),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'categories': categories,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@@ -15,10 +15,10 @@ from ..utils import (
|
||||
|
||||
|
||||
class GorillaVidIE(InfoExtractor):
|
||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in and fastvideo.in'
|
||||
IE_DESC = 'GorillaVid.in, daclips.in, movpod.in, fastvideo.in and realvid.net'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?P<host>(?:www\.)?
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in))/
|
||||
(?:daclips\.in|gorillavid\.in|movpod\.in|fastvideo\.in|realvid\.net))/
|
||||
(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
|
||||
'''
|
||||
|
||||
@@ -61,6 +61,15 @@ class GorillaVidIE(InfoExtractor):
|
||||
'title': 'Man of Steel - Trailer',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://realvid.net/ctn2y6p2eviw',
|
||||
'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
|
||||
'info_dict': {
|
||||
'id': 'ctn2y6p2eviw',
|
||||
'ext': 'flv',
|
||||
'title': 'rdx 1955',
|
||||
'thumbnail': 're:http://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://movpod.in/0wguyyxi1yca',
|
||||
'only_matching': True,
|
||||
@@ -97,7 +106,7 @@ class GorillaVidIE(InfoExtractor):
|
||||
webpage = self._download_webpage(req, video_id, 'Downloading video page')
|
||||
|
||||
title = self._search_regex(
|
||||
r'style="z-index: [0-9]+;">([^<]+)</span>',
|
||||
[r'style="z-index: [0-9]+;">([^<]+)</span>', r'>Watch (.+) '],
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)
|
||||
video_url = self._search_regex(
|
||||
r'file\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'file url')
|
||||
|
@@ -140,9 +140,9 @@ class GroovesharkIE(InfoExtractor):
|
||||
|
||||
if webpage is not None:
|
||||
o = GroovesharkHtmlParser.extract_object_tags(webpage)
|
||||
return (webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed'])
|
||||
return webpage, [x for x in o if x['attrs']['id'] == 'jsPlayerEmbed']
|
||||
|
||||
return (webpage, None)
|
||||
return webpage, None
|
||||
|
||||
def _real_initialize(self):
|
||||
self.ts = int(time.time() * 1000) # timestamp in millis
|
||||
@@ -154,7 +154,7 @@ class GroovesharkIE(InfoExtractor):
|
||||
swf_referer = None
|
||||
if self.do_playerpage_request:
|
||||
(_, player_objs) = self._get_playerpage(url)
|
||||
if player_objs is not None:
|
||||
if player_objs:
|
||||
swf_referer = self._build_swf_referer(url, player_objs[0])
|
||||
self.to_screen('SWF Referer: %s' % swf_referer)
|
||||
|
||||
|
@@ -10,6 +10,7 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
compat_str,
|
||||
determine_ext,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,7 +43,8 @@ class HitboxIE(InfoExtractor):
|
||||
def _extract_metadata(self, url, video_id):
|
||||
thumb_base = 'https://edge.sf.hitbox.tv'
|
||||
metadata = self._download_json(
|
||||
'%s/%s' % (url, video_id), video_id)
|
||||
'%s/%s' % (url, video_id), video_id,
|
||||
'Downloading metadata JSON')
|
||||
|
||||
date = 'media_live_since'
|
||||
media_type = 'livestream'
|
||||
@@ -87,21 +89,41 @@ class HitboxIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
formats = []
|
||||
for video in player_config['clip']['bitrates']:
|
||||
label = video.get('label')
|
||||
if label == 'Auto':
|
||||
continue
|
||||
video_url = video.get('url')
|
||||
if not video_url:
|
||||
continue
|
||||
bitrate = int_or_none(video.get('bitrate'))
|
||||
if determine_ext(video_url) == 'm3u8':
|
||||
if not video_url.startswith('http'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'tbr': bitrate,
|
||||
'format_note': label,
|
||||
'protocol': 'm3u8_native',
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'tbr': bitrate,
|
||||
'format_note': label,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.hitbox.tv/api/media/video',
|
||||
video_id)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.hitbox.tv/api/player/config/video/%s' % video_id,
|
||||
video_id)
|
||||
|
||||
clip = player_config.get('clip')
|
||||
video_url = clip.get('url')
|
||||
res = clip.get('bitrates', [])[0].get('label')
|
||||
|
||||
metadata['resolution'] = res
|
||||
metadata['url'] = video_url
|
||||
metadata['protocol'] = 'm3u8'
|
||||
metadata['formats'] = formats
|
||||
|
||||
return metadata
|
||||
|
||||
@@ -129,10 +151,6 @@ class HitboxLiveIE(HitboxIE):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.hitbox.tv/api/media/live',
|
||||
video_id)
|
||||
|
||||
player_config = self._download_json(
|
||||
'https://www.hitbox.tv/api/player/config/live/%s' % video_id,
|
||||
video_id)
|
||||
@@ -147,20 +165,39 @@ class HitboxLiveIE(HitboxIE):
|
||||
servers.append(base_url)
|
||||
for stream in cdn.get('bitrates'):
|
||||
label = stream.get('label')
|
||||
if label != 'Auto':
|
||||
if label == 'Auto':
|
||||
continue
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
bitrate = int_or_none(stream.get('bitrate'))
|
||||
if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8':
|
||||
if not stream_url.startswith('http'):
|
||||
continue
|
||||
formats.append({
|
||||
'url': '%s/%s' % (base_url, stream.get('url')),
|
||||
'url': stream_url,
|
||||
'ext': 'mp4',
|
||||
'vbr': stream.get('bitrate'),
|
||||
'resolution': label,
|
||||
'tbr': bitrate,
|
||||
'format_note': label,
|
||||
'rtmp_live': True,
|
||||
})
|
||||
else:
|
||||
formats.append({
|
||||
'url': '%s/%s' % (base_url, stream_url),
|
||||
'ext': 'mp4',
|
||||
'tbr': bitrate,
|
||||
'rtmp_live': True,
|
||||
'format_note': host,
|
||||
'page_url': url,
|
||||
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
metadata = self._extract_metadata(
|
||||
'https://www.hitbox.tv/api/media/live',
|
||||
video_id)
|
||||
metadata['formats'] = formats
|
||||
metadata['is_live'] = True
|
||||
metadata['title'] = self._live_title(metadata.get('title'))
|
||||
|
||||
return metadata
|
||||
|
@@ -61,7 +61,7 @@ class IGNIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||
'md5': '4e9a0bda1e5eebd31ddcf86ec0b9b3c7',
|
||||
'md5': '618fedb9c901fd086f6f093564ef8558',
|
||||
'info_dict': {
|
||||
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
||||
'ext': 'mp4',
|
||||
@@ -77,10 +77,10 @@ class IGNIE(InfoExtractor):
|
||||
def _find_video_id(self, webpage):
|
||||
res_id = [
|
||||
r'"video_id"\s*:\s*"(.*?)"',
|
||||
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
||||
r'data-video-id="(.+?)"',
|
||||
r'<object id="vid_(.+?)"',
|
||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
||||
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
||||
]
|
||||
return self._search_regex(res_id, webpage, 'video id')
|
||||
|
||||
|
@@ -3,13 +3,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
)
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class InstagramIE(InfoExtractor):
|
||||
_VALID_URL = r'http://instagram\.com/p/(?P<id>.*?)/'
|
||||
_VALID_URL = r'https?://instagram\.com/p/(?P<id>[\da-zA-Z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
'md5': '0d2da106a9d2631273e192b372806516',
|
||||
@@ -23,8 +21,8 @@ class InstagramIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
uploader_id = self._search_regex(r'"owner":{"username":"(.+?)"',
|
||||
webpage, 'uploader id', fatal=False)
|
||||
|
@@ -40,8 +40,10 @@ class KrasViewIE(InfoExtractor):
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
thumbnail = flashvars.get('image') or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(flashvars.get('duration'))
|
||||
width = int_or_none(self._og_search_property('video:width', webpage, 'video width'))
|
||||
height = int_or_none(self._og_search_property('video:height', webpage, 'video height'))
|
||||
width = int_or_none(self._og_search_property(
|
||||
'video:width', webpage, 'video width', default=None))
|
||||
height = int_or_none(self._og_search_property(
|
||||
'video:height', webpage, 'video height', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@@ -88,12 +88,13 @@ class LetvIE(InfoExtractor):
|
||||
play_json_req = compat_urllib_request.Request(
|
||||
'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params)
|
||||
)
|
||||
play_json_req.add_header(
|
||||
'Ytdl-request-proxy',
|
||||
self._downloader.params.get('cn_verification_proxy'))
|
||||
cn_verification_proxy = self._downloader.params.get('cn_verification_proxy')
|
||||
if cn_verification_proxy:
|
||||
play_json_req.add_header('Ytdl-request-proxy', cn_verification_proxy)
|
||||
|
||||
play_json = self._download_json(
|
||||
play_json_req,
|
||||
media_id, 'playJson data')
|
||||
media_id, 'Downloading playJson data')
|
||||
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
|
59
youtube_dl/extractor/libsyn.py
Normal file
59
youtube_dl/extractor/libsyn.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import unified_strdate
|
||||
|
||||
|
||||
class LibsynIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
|
||||
'md5': '443360ee1b58007bc3dcf09b41d093bb',
|
||||
'info_dict': {
|
||||
'id': '3377616',
|
||||
'ext': 'mp3',
|
||||
'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
|
||||
'description': 'md5:601cb790edd05908957dae8aaa866465',
|
||||
'upload_date': '20150220',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
} for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))]
|
||||
|
||||
podcast_title = self._search_regex(
|
||||
r'<h2>([^<]+)</h2>', webpage, 'title')
|
||||
episode_title = self._search_regex(
|
||||
r'<h3>([^<]+)</h3>', webpage, 'title', default=None)
|
||||
|
||||
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'<div id="info_text_body">(.+?)</div>', webpage,
|
||||
'description', fatal=False)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
|
||||
release_date = unified_strdate(self._search_regex(
|
||||
r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': release_date,
|
||||
'formats': formats,
|
||||
}
|
@@ -21,7 +21,7 @@ from ..utils import (
|
||||
|
||||
class LivestreamIE(InfoExtractor):
|
||||
IE_NAME = 'livestream'
|
||||
_VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
|
||||
'md5': '53274c76ba7754fb0e8d072716f2292b',
|
||||
@@ -51,6 +51,9 @@ class LivestreamIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _parse_smil(self, video_id, smil_url):
|
||||
|
56
youtube_dl/extractor/megavideoz.py
Normal file
56
youtube_dl/extractor/megavideoz.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MegaVideozIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?megavideoz\.eu/video/(?P<id>[^/]+)(?:/(?P<display_id>[^/]+))?'
|
||||
_TEST = {
|
||||
'url': 'http://megavideoz.eu/video/WM6UB919XMXH/SMPTE-Universal-Film-Leader',
|
||||
'info_dict': {
|
||||
'id': '48723',
|
||||
'display_id': 'SMPTE-Universal-Film-Leader',
|
||||
'ext': 'mp4',
|
||||
'title': 'SMPTE Universal Film Leader',
|
||||
'thumbnail': 're:https?://.*?\.jpg',
|
||||
'duration': 10.93,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if any(p in webpage for p in ('>Video Not Found<', '>404 Error<')):
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
config = self._download_xml(
|
||||
self._search_regex(
|
||||
r"var\s+cnf\s*=\s*'([^']+)'", webpage, 'cnf url'),
|
||||
display_id)
|
||||
|
||||
video_url = xpath_text(config, './file', 'video url', fatal=True)
|
||||
title = xpath_text(config, './title', 'title', fatal=True)
|
||||
thumbnail = xpath_text(config, './image', 'thumbnail')
|
||||
duration = float_or_none(xpath_text(config, './duration', 'duration'))
|
||||
video_id = xpath_text(config, './mediaid', 'video id') or video_id
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration
|
||||
}
|
106
youtube_dl/extractor/miomio.py
Normal file
106
youtube_dl/extractor/miomio.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class MioMioIE(InfoExtractor):
|
||||
IE_NAME = 'miomio.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?miomio\.tv/watch/cc(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# "type=video" in flashvars
|
||||
'url': 'http://www.miomio.tv/watch/cc88912/',
|
||||
'md5': '317a5f7f6b544ce8419b784ca8edae65',
|
||||
'info_dict': {
|
||||
'id': '88912',
|
||||
'ext': 'flv',
|
||||
'title': '【SKY】字幕 铠武昭和VS平成 假面骑士大战FEAT战队 魔星字幕组 字幕',
|
||||
'duration': 5923,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.miomio.tv/watch/cc184024/',
|
||||
'info_dict': {
|
||||
'id': '43729',
|
||||
'title': '《动漫同人插画绘制》',
|
||||
},
|
||||
'playlist_mincount': 86,
|
||||
'skip': 'This video takes time too long for retrieving the URL',
|
||||
}, {
|
||||
'url': 'http://www.miomio.tv/watch/cc173113/',
|
||||
'info_dict': {
|
||||
'id': '173113',
|
||||
'title': 'The New Macbook 2015 上手试玩与简评'
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_meta(
|
||||
'description', webpage, 'title', fatal=True)
|
||||
|
||||
mioplayer_path = self._search_regex(
|
||||
r'src="(/mioplayer/[^"]+)"', webpage, 'ref_path')
|
||||
|
||||
xml_config = self._search_regex(
|
||||
r'flashvars="type=(?:sina|video)&(.+?)&',
|
||||
webpage, 'xml config')
|
||||
|
||||
# skipping the following page causes lags and eventually connection drop-outs
|
||||
self._request_webpage(
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)),
|
||||
video_id)
|
||||
|
||||
# the following xml contains the actual configuration information on the video file(s)
|
||||
vid_config = self._download_xml(
|
||||
'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/sina.php?{0}'.format(xml_config),
|
||||
video_id)
|
||||
|
||||
http_headers = {
|
||||
'Referer': 'http://www.miomio.tv%s' % mioplayer_path,
|
||||
}
|
||||
|
||||
if not int_or_none(xpath_text(vid_config, 'timelength')):
|
||||
raise ExtractorError('Unable to load videos!', expected=True)
|
||||
|
||||
entries = []
|
||||
for f in vid_config.findall('./durl'):
|
||||
segment_url = xpath_text(f, 'url', 'video url')
|
||||
if not segment_url:
|
||||
continue
|
||||
order = xpath_text(f, 'order', 'order')
|
||||
segment_id = video_id
|
||||
segment_title = title
|
||||
if order:
|
||||
segment_id += '-%s' % order
|
||||
segment_title += ' part %s' % order
|
||||
entries.append({
|
||||
'id': segment_id,
|
||||
'url': segment_url,
|
||||
'title': segment_title,
|
||||
'duration': int_or_none(xpath_text(f, 'length', 'duration'), 1000),
|
||||
'http_headers': http_headers,
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
segment = entries[0]
|
||||
segment['id'] = video_id
|
||||
segment['title'] = title
|
||||
return segment
|
||||
|
||||
return {
|
||||
'_type': 'multi_video',
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
'title': title,
|
||||
'http_headers': http_headers,
|
||||
}
|
@@ -10,7 +10,6 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
str_to_int,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -27,8 +26,6 @@ class MixcloudIE(InfoExtractor):
|
||||
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
|
||||
'uploader': 'Daniel Holbach',
|
||||
'uploader_id': 'dholbach',
|
||||
'upload_date': '20111115',
|
||||
'timestamp': 1321359578,
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@@ -37,32 +34,27 @@ class MixcloudIE(InfoExtractor):
|
||||
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
|
||||
'info_dict': {
|
||||
'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
|
||||
'ext': 'm4a',
|
||||
'title': 'Electric Relaxation vol. 3',
|
||||
'ext': 'mp3',
|
||||
'title': 'Caribou 7 inch Vinyl Mix & Chat',
|
||||
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
|
||||
'uploader': 'Daniel Drumz',
|
||||
'uploader': 'Gilles Peterson Worldwide',
|
||||
'uploader_id': 'gillespeterson',
|
||||
'thumbnail': 're:https?://.*\.jpg',
|
||||
'thumbnail': 're:https?://.*/images/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_url(self, track_id, template_url):
|
||||
server_count = 30
|
||||
for i in range(server_count):
|
||||
url = template_url % i
|
||||
try:
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(
|
||||
HEADRequest(url), track_id,
|
||||
'Checking URL %d/%d ...' % (i + 1, server_count + 1))
|
||||
return url
|
||||
except ExtractorError:
|
||||
pass
|
||||
|
||||
return None
|
||||
def _check_url(self, url, track_id, ext):
|
||||
try:
|
||||
# We only want to know if the request succeed
|
||||
# don't download the whole file
|
||||
self._request_webpage(
|
||||
HEADRequest(url), track_id,
|
||||
'Trying %s URL' % ext)
|
||||
return True
|
||||
except ExtractorError:
|
||||
return False
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@@ -75,17 +67,13 @@ class MixcloudIE(InfoExtractor):
|
||||
preview_url = self._search_regex(
|
||||
r'\s(?:data-preview-url|m-preview)="([^"]+)"', webpage, 'preview url')
|
||||
song_url = preview_url.replace('/previews/', '/c/originals/')
|
||||
template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
|
||||
final_song_url = self._get_url(track_id, template_url)
|
||||
if final_song_url is None:
|
||||
self.to_screen('Trying with m4a extension')
|
||||
template_url = template_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
final_song_url = self._get_url(track_id, template_url)
|
||||
if final_song_url is None:
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
if not self._check_url(song_url, track_id, 'mp3'):
|
||||
song_url = song_url.replace('.mp3', '.m4a').replace('originals/', 'm4a/64/')
|
||||
if not self._check_url(song_url, track_id, 'm4a'):
|
||||
raise ExtractorError('Unable to extract track url')
|
||||
|
||||
PREFIX = (
|
||||
r'<span class="play-button[^"]*?"'
|
||||
r'm-play-on-spacebar[^>]+'
|
||||
r'(?:\s+[a-zA-Z0-9-]+(?:="[^"]+")?)*?\s+')
|
||||
title = self._html_search_regex(
|
||||
PREFIX + r'm-title="([^"]+)"', webpage, 'title')
|
||||
@@ -99,26 +87,21 @@ class MixcloudIE(InfoExtractor):
|
||||
r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
like_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserLikes:([0-9]+)"',
|
||||
r'/favorites/?">([0-9]+)<'],
|
||||
r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"',
|
||||
webpage, 'like count', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
[r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
|
||||
r'/listeners/?">([0-9,.]+)</a>'],
|
||||
webpage, 'play count', fatal=False))
|
||||
timestamp = parse_iso8601(self._search_regex(
|
||||
r'<time itemprop="dateCreated" datetime="([^"]+)">',
|
||||
webpage, 'upload date', default=None))
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'url': final_song_url,
|
||||
'url': song_url,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'timestamp': timestamp,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class MLBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://m(?:lb)?\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_VALID_URL = r'https?://m(?:lb)?\.(?:[\da-z_-]+\.)?mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|(?:shared/video/embed/embed\.html|[^/]+/video/play\.jsp)\?.*?\bcontent_id=)(?P<id>n?\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
|
||||
@@ -80,6 +80,10 @@ class MLBIE(InfoExtractor):
|
||||
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@@ -9,6 +9,7 @@ from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class MonikerIE(InfoExtractor):
|
||||
@@ -40,6 +41,15 @@ class MonikerIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
orig_webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>File Not Found<' in orig_webpage:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
error = self._search_regex(
|
||||
r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
|
||||
data = dict(fields)
|
||||
|
||||
|
@@ -25,6 +25,7 @@ def _media_xml_tag(tag):
|
||||
|
||||
class MTVServicesInfoExtractor(InfoExtractor):
|
||||
_MOBILE_TEMPLATE = None
|
||||
_LANG = None
|
||||
|
||||
@staticmethod
|
||||
def _id_from_uri(uri):
|
||||
@@ -118,6 +119,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
mediagen_doc = self._download_xml(mediagen_url, video_id,
|
||||
'Downloading video urls')
|
||||
|
||||
item = mediagen_doc.find('./video/item')
|
||||
if item is not None and item.get('type') == 'text':
|
||||
message = '%s returned error: ' % self.IE_NAME
|
||||
if item.get('code') is not None:
|
||||
message += '%s - ' % item.get('code')
|
||||
message += item.text
|
||||
raise ExtractorError(message, expected=True)
|
||||
|
||||
description_node = itemdoc.find('description')
|
||||
if description_node is not None:
|
||||
description = description_node.text.strip()
|
||||
@@ -161,8 +170,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||
video_id = self._id_from_uri(uri)
|
||||
feed_url = self._get_feed_url(uri)
|
||||
data = compat_urllib_parse.urlencode({'uri': uri})
|
||||
info_url = feed_url + '?'
|
||||
if self._LANG:
|
||||
info_url += 'lang=%s&' % self._LANG
|
||||
info_url += data
|
||||
idoc = self._download_xml(
|
||||
feed_url + '?' + data, video_id,
|
||||
info_url, video_id,
|
||||
'Downloading info', transform_source=fix_xml_ampersands)
|
||||
return self.playlist_result(
|
||||
[self._get_video_info(item) for item in idoc.findall('.//item')])
|
||||
|
@@ -14,7 +14,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class NBCIE(InfoExtractor):
|
||||
_VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||
_VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
@@ -50,6 +50,57 @@ class NBCIE(InfoExtractor):
|
||||
return self.url_result(theplatform_url)
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI',
|
||||
'info_dict': {
|
||||
'id': '9CsDKds0kvHI',
|
||||
'ext': 'flv',
|
||||
'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
|
||||
'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
iframe_m = re.search(
|
||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||
if iframe_m:
|
||||
return iframe_m.group('url')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
theplatform_url = self._og_search_video_url(webpage)
|
||||
return self.url_result(theplatform_url, 'ThePlatform')
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
# Does not include https becuase its certificate is invalid
|
||||
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||
'info_dict': {
|
||||
'id': 'PHJSaFWbrTY9',
|
||||
'ext': 'flv',
|
||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self.url_result(
|
||||
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||
|
||||
|
||||
class NBCNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||
(?:video/.+?/(?P<id>\d+)|
|
||||
|
@@ -22,7 +22,7 @@ class NiconicoIE(InfoExtractor):
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||
'info_dict': {
|
||||
@@ -39,7 +39,24 @@ class NiconicoIE(InfoExtractor):
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nicovideo.jp/watch/nm14296458',
|
||||
'md5': '8db08e0158457cf852a31519fceea5bc',
|
||||
'info_dict': {
|
||||
'id': 'nm14296458',
|
||||
'ext': 'swf',
|
||||
'title': '【鏡音リン】Dance on media【オリジナル】take2!',
|
||||
'description': 'md5:',
|
||||
'uploader': 'りょうた',
|
||||
'uploader_id': '18822557',
|
||||
'upload_date': '20110429',
|
||||
'duration': 209,
|
||||
},
|
||||
'params': {
|
||||
'username': 'ydl.niconico@gmail.com',
|
||||
'password': 'youtube-dl',
|
||||
},
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
@@ -89,7 +106,7 @@ class NiconicoIE(InfoExtractor):
|
||||
if self._AUTHENTICATED:
|
||||
# Get flv info
|
||||
flv_info_webpage = self._download_webpage(
|
||||
'http://flapi.nicovideo.jp/api/getflv?v=' + video_id,
|
||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||
video_id, 'Downloading flv info')
|
||||
else:
|
||||
# Get external player info
|
||||
|
@@ -231,7 +231,10 @@ class NPOLiveIE(NPOBaseIE):
|
||||
stream_url = self._download_json(
|
||||
stream_info['stream'], display_id,
|
||||
'Downloading %s URL' % stream_type,
|
||||
transform_source=strip_jsonp)
|
||||
'Unable to download %s URL' % stream_type,
|
||||
transform_source=strip_jsonp, fatal=False)
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_type == 'hds':
|
||||
f4m_formats = self._extract_f4m_formats(stream_url, display_id)
|
||||
# f4m downloader downloads only piece of live stream
|
||||
|
@@ -14,46 +14,48 @@ from ..utils import (
|
||||
|
||||
|
||||
class NRKIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
|
||||
_VALID_URL = r'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
|
||||
'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
|
||||
'url': 'http://www.nrk.no/video/PS*150533',
|
||||
'md5': 'bccd850baebefe23b56d708a113229c2',
|
||||
'info_dict': {
|
||||
'id': '150533',
|
||||
'ext': 'flv',
|
||||
'title': 'Dompap og andre fugler i Piip-Show',
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
|
||||
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
|
||||
'duration': 263,
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
|
||||
'md5': '3471f2a51718195164e88f46bf427668',
|
||||
'url': 'http://www.nrk.no/video/PS*154915',
|
||||
'md5': '0b1493ba1aae7d9579a5ad5531bc395a',
|
||||
'info_dict': {
|
||||
'id': '154915',
|
||||
'ext': 'flv',
|
||||
'title': 'Slik høres internett ut når du er blind',
|
||||
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
|
||||
'duration': 20,
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
page = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._html_search_regex(r'<div class="nrk-video" data-nrk-id="(\d+)">', page, 'video id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://v7.psapi.nrk.no/mediaelement/%s' % video_id, video_id, 'Downloading media JSON')
|
||||
'http://v8.psapi.nrk.no/mediaelement/%s' % video_id,
|
||||
video_id, 'Downloading media JSON')
|
||||
|
||||
if data['usageRights']['isGeoBlocked']:
|
||||
raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected=True)
|
||||
raise ExtractorError(
|
||||
'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge',
|
||||
expected=True)
|
||||
|
||||
video_url = data['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
|
||||
video_url = data['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81'
|
||||
|
||||
duration = parse_duration(data.get('duration'))
|
||||
|
||||
images = data.get('images')
|
||||
if images:
|
||||
@@ -69,10 +71,51 @@ class NRKIE(InfoExtractor):
|
||||
'ext': 'flv',
|
||||
'title': data['title'],
|
||||
'description': data['description'],
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class NRKPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||
'info_dict': {
|
||||
'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
|
||||
'title': 'Gjenopplev den historiske solformørkelsen',
|
||||
'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
|
||||
'info_dict': {
|
||||
'id': 'rivertonprisen-til-karin-fossum-1.12266449',
|
||||
'title': 'Rivertonprisen til Karin Fossum',
|
||||
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('nrk:%s' % video_id, 'NRK')
|
||||
for video_id in re.findall(
|
||||
r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"',
|
||||
webpage)
|
||||
]
|
||||
|
||||
playlist_title = self._og_search_title(webpage)
|
||||
playlist_description = self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
|
||||
class NRKTVIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
|
||||
|
||||
|
@@ -1,15 +1,17 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': '18a525a510f942ada2720db5f31644c0',
|
||||
'info_dict': {
|
||||
@@ -22,18 +24,21 @@ class NYTimesIE(InfoExtractor):
|
||||
'uploader': 'Brett Weiner',
|
||||
'duration': 419,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON')
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
|
||||
title = video_data['headline']
|
||||
description = video_data['summary']
|
||||
duration = video_data['duration'] / 1000.0
|
||||
description = video_data.get('summary')
|
||||
duration = float_or_none(video_data.get('duration'), 1000)
|
||||
|
||||
uploader = video_data['byline']
|
||||
timestamp = parse_iso8601(video_data['publication_date'][:-8])
|
||||
@@ -49,11 +54,11 @@ class NYTimesIE(InfoExtractor):
|
||||
formats = [
|
||||
{
|
||||
'url': video['url'],
|
||||
'format_id': video['type'],
|
||||
'vcodec': video['video_codec'],
|
||||
'width': video['width'],
|
||||
'height': video['height'],
|
||||
'filesize': get_file_size(video['fileSize']),
|
||||
'format_id': video.get('type'),
|
||||
'vcodec': video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('fileSize')),
|
||||
} for video in video_data['renditions']
|
||||
]
|
||||
self._sort_formats(formats)
|
||||
@@ -61,7 +66,8 @@ class NYTimesIE(InfoExtractor):
|
||||
thumbnails = [
|
||||
{
|
||||
'url': 'http://www.nytimes.com/%s' % image['url'],
|
||||
'resolution': '%dx%d' % (image['width'], image['height']),
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
} for image in video_data['images']
|
||||
]
|
||||
|
||||
|
@@ -210,16 +210,16 @@ class ORFIPTVIE(InfoExtractor):
|
||||
_VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://iptv.orf.at/stories/2267952',
|
||||
'md5': '26ffa4bab6dbce1eee78bbc7021016cd',
|
||||
'url': 'http://iptv.orf.at/stories/2275236/',
|
||||
'md5': 'c8b22af4718a4b4af58342529453e3e5',
|
||||
'info_dict': {
|
||||
'id': '339775',
|
||||
'id': '350612',
|
||||
'ext': 'flv',
|
||||
'title': 'Kreml-Kritiker Nawalny wieder frei',
|
||||
'description': 'md5:6f24e7f546d364dacd0e616a9e409236',
|
||||
'duration': 84.729,
|
||||
'title': 'Weitere Evakuierungen um Vulkan Calbuco',
|
||||
'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
|
||||
'duration': 68.197,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150306',
|
||||
'upload_date': '20150425',
|
||||
},
|
||||
}
|
||||
|
||||
|
78
youtube_dl/extractor/philharmoniedeparis.py
Normal file
78
youtube_dl/extractor/philharmoniedeparis.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class PhilharmonieDeParisIE(InfoExtractor):
|
||||
IE_DESC = 'Philharmonie de Paris'
|
||||
_VALID_URL = r'http://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
|
||||
'info_dict': {
|
||||
'id': '1032066',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
|
||||
'timestamp': 1428179400,
|
||||
'upload_date': '20150404',
|
||||
'duration': 6592.278,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
concert = self._download_xml(
|
||||
'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
|
||||
video_id).find('./concert')
|
||||
|
||||
formats = []
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': xpath_text(concert, './titre', 'title', fatal=True),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
fichiers = concert.find('./fichiers')
|
||||
stream = fichiers.attrib['serveurstream']
|
||||
for fichier in fichiers.findall('./fichier'):
|
||||
info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
|
||||
for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
|
||||
format_url = fichier.get('url%s' % suffix)
|
||||
if not format_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': stream,
|
||||
'play_path': format_url,
|
||||
'ext': 'flv',
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(concert.get('largeur%s' % suffix)),
|
||||
'height': int_or_none(concert.get('hauteur%s' % suffix)),
|
||||
'quality': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
date, hour = concert.get('date'), concert.get('heure')
|
||||
if date and hour:
|
||||
info_dict['timestamp'] = parse_iso8601(
|
||||
'%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
|
||||
elif date:
|
||||
info_dict['upload_date'] = date
|
||||
|
||||
return info_dict
|
@@ -5,19 +5,33 @@ from .zdf import extract_from_xml_url
|
||||
|
||||
|
||||
class PhoenixIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/content/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.phoenix.de/content/884301',
|
||||
'md5': 'ed249f045256150c92e72dbb70eadec6',
|
||||
'info_dict': {
|
||||
'id': '884301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Michael Krons mit Hans-Werner Sinn',
|
||||
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
|
||||
'upload_date': '20141025',
|
||||
'uploader': 'Im Dialog',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
|
||||
(?:
|
||||
phoenix/die_sendungen/(?:[^/]+/)?
|
||||
)?
|
||||
(?P<id>[0-9]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.phoenix.de/content/884301',
|
||||
'md5': 'ed249f045256150c92e72dbb70eadec6',
|
||||
'info_dict': {
|
||||
'id': '884301',
|
||||
'ext': 'mp4',
|
||||
'title': 'Michael Krons mit Hans-Werner Sinn',
|
||||
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
|
||||
'upload_date': '20141025',
|
||||
'uploader': 'Im Dialog',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@@ -30,7 +30,7 @@ class PladformIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '100183293',
|
||||
'ext': 'mp4',
|
||||
'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть',
|
||||
'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
|
||||
'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'duration': 694,
|
||||
|
@@ -4,85 +4,72 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class PlayFMIE(InfoExtractor):
|
||||
IE_NAME = 'play.fm'
|
||||
_VALID_URL = r'https?://(?:www\.)?play\.fm/[^?#]*(?P<upload_date>[0-9]{8})(?P<id>[0-9]{6})(?:$|[?#])'
|
||||
_VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.play.fm/recording/leipzigelectronicmusicbatofarparis_fr20140712137220',
|
||||
'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12',
|
||||
'md5': 'c505f8307825a245d0c7ad1850001f22',
|
||||
'info_dict': {
|
||||
'id': '137220',
|
||||
'id': '71276',
|
||||
'ext': 'mp3',
|
||||
'title': 'LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
||||
'uploader': 'Sven Tasnadi',
|
||||
'uploader_id': 'sventasnadi',
|
||||
'duration': 5627.428,
|
||||
'upload_date': '20140712',
|
||||
'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12',
|
||||
'description': '',
|
||||
'duration': 5627,
|
||||
'timestamp': 1406033781,
|
||||
'upload_date': '20140722',
|
||||
'uploader': 'Dan Drastic',
|
||||
'uploader_id': '71170',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
upload_date = mobj.group('upload_date')
|
||||
slug = mobj.group('slug')
|
||||
|
||||
rec_data = compat_urllib_parse.urlencode({'rec_id': video_id})
|
||||
req = compat_urllib_request.Request(
|
||||
'http://www.play.fm/flexRead/recording', data=rec_data)
|
||||
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
rec_doc = self._download_xml(req, video_id)
|
||||
recordings = self._download_json(
|
||||
'http://v2api.play.fm/recordings/slug/%s' % slug, video_id)
|
||||
|
||||
error_node = rec_doc.find('./error')
|
||||
if error_node is not None:
|
||||
raise ExtractorError('An error occured: %s (code %s)' % (
|
||||
error_node.text, rec_doc.find('./status').text))
|
||||
error = recordings.get('error')
|
||||
if isinstance(error, dict):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, error.get('message')),
|
||||
expected=True)
|
||||
|
||||
recording = rec_doc.find('./recording')
|
||||
title = recording.find('./title').text
|
||||
view_count = str_to_int(recording.find('./stats/playcount').text)
|
||||
comment_count = str_to_int(recording.find('./stats/comments').text)
|
||||
duration = float_or_none(recording.find('./duration').text, scale=1000)
|
||||
thumbnail = recording.find('./image').text
|
||||
|
||||
artist = recording.find('./artists/artist')
|
||||
uploader = artist.find('./name').text
|
||||
uploader_id = artist.find('./slug').text
|
||||
|
||||
video_url = '%s//%s/%s/%s/offset/0/sh/%s/rec/%s/jingle/%s/loc/%s' % (
|
||||
'http:', recording.find('./url').text,
|
||||
recording.find('./_class').text, recording.find('./file_id').text,
|
||||
rec_doc.find('./uuid').text, video_id,
|
||||
rec_doc.find('./jingle/file_id').text,
|
||||
'http%3A%2F%2Fwww.play.fm%2Fplayer',
|
||||
)
|
||||
audio_url = recordings['audio']
|
||||
video_id = compat_str(recordings.get('id') or video_id)
|
||||
title = recordings['title']
|
||||
description = recordings.get('description')
|
||||
duration = int_or_none(recordings.get('recordingDuration'))
|
||||
timestamp = parse_iso8601(recordings.get('created_at'))
|
||||
uploader = recordings.get('page', {}).get('title')
|
||||
uploader_id = compat_str(recordings.get('page', {}).get('id'))
|
||||
view_count = int_or_none(recordings.get('playCount'))
|
||||
comment_count = int_or_none(recordings.get('commentCount'))
|
||||
categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp3',
|
||||
'filesize': int_or_none(recording.find('./size').text),
|
||||
'url': audio_url,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
}
|
||||
|
@@ -33,10 +33,8 @@ class PornHubIE(InfoExtractor):
|
||||
}
|
||||
|
||||
def _extract_count(self, pattern, webpage, name):
|
||||
count = self._html_search_regex(pattern, webpage, '%s count' % name, fatal=False)
|
||||
if count:
|
||||
count = str_to_int(count)
|
||||
return count
|
||||
return str_to_int(self._search_regex(
|
||||
pattern, webpage, '%s count' % name, fatal=False))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@@ -62,11 +60,14 @@ class PornHubIE(InfoExtractor):
|
||||
if thumbnail:
|
||||
thumbnail = compat_urllib_parse.unquote(thumbnail)
|
||||
|
||||
view_count = self._extract_count(r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||
like_count = self._extract_count(r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||
view_count = self._extract_count(
|
||||
r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
|
||||
like_count = self._extract_count(
|
||||
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
|
||||
dislike_count = self._extract_count(
|
||||
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
|
||||
comment_count = self._extract_count(
|
||||
r'All comments \(<var class="videoCommentCount">([\d,\.]+)</var>', webpage, 'comment')
|
||||
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
|
||||
|
||||
video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage)))
|
||||
if webpage.find('"encrypted":true') != -1:
|
||||
|
96
youtube_dl/extractor/pornovoisines.py
Normal file
96
youtube_dl/extractor/pornovoisines.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class PornoVoisinesIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
|
||||
|
||||
_VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
|
||||
'/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
|
||||
|
||||
_SERVER_NUMBERS = (1, 2)
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
|
||||
'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
|
||||
'info_dict': {
|
||||
'id': '1285',
|
||||
'display_id': 'recherche-appartement',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recherche appartement',
|
||||
'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
|
||||
'thumbnail': 're:^https?://.*\.jpg$',
|
||||
'upload_date': '20140925',
|
||||
'duration': 120,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'categories': ['Débutante', 'Scénario', 'Sodomie'],
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def build_video_url(cls, num):
|
||||
return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self.build_video_url(video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
|
||||
description = self._html_search_regex(
|
||||
r'<article id="descriptif">(.+?)</article>',
|
||||
webpage, "description", fatal=False, flags=re.DOTALL)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
|
||||
webpage, 'thumbnail', fatal=False)
|
||||
if thumbnail:
|
||||
thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
|
||||
duration = int_or_none(self._search_regex(
|
||||
'Durée (\d+)', webpage, 'duration', fatal=False))
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'(\d+) vues', webpage, 'view count', fatal=False))
|
||||
average_rating = self._search_regex(
|
||||
r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
|
||||
if average_rating:
|
||||
average_rating = float_or_none(average_rating.replace(',', '.'))
|
||||
|
||||
categories = self._html_search_meta(
|
||||
'keywords', webpage, 'categories', fatal=False)
|
||||
if categories:
|
||||
categories = [category.strip() for category in categories.split(',')]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
'categories': categories,
|
||||
'age_limit': 18,
|
||||
}
|
69
youtube_dl/extractor/primesharetv.py
Normal file
69
youtube_dl/extractor/primesharetv.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class PrimeShareTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://primeshare.tv/download/238790B611',
|
||||
'md5': 'b92d9bf5461137c36228009f31533fbc',
|
||||
'info_dict': {
|
||||
'id': '238790B611',
|
||||
'ext': 'mp4',
|
||||
'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '>File not exist<' in webpage:
|
||||
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
|
||||
|
||||
fields = dict(re.findall(r'''(?x)<input\s+
|
||||
type="hidden"\s+
|
||||
name="([^"]+)"\s+
|
||||
(?:id="[^"]+"\s+)?
|
||||
value="([^"]*)"
|
||||
''', webpage))
|
||||
|
||||
headers = {
|
||||
'Referer': url,
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}
|
||||
|
||||
wait_time = int(self._search_regex(
|
||||
r'var\s+cWaitTime\s*=\s*(\d+)',
|
||||
webpage, 'wait time', default=7)) + 1
|
||||
self._sleep(wait_time, video_id)
|
||||
|
||||
req = compat_urllib_request.Request(
|
||||
url, compat_urllib_parse.urlencode(fields), headers)
|
||||
video_page = self._download_webpage(
|
||||
req, video_id, 'Downloading video page')
|
||||
|
||||
video_url = self._search_regex(
|
||||
r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
|
||||
video_page, 'video url')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h1>Watch\s*(?: )?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?: )?\s*<strong>',
|
||||
video_page, 'title')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'ext': 'mp4',
|
||||
}
|
@@ -10,6 +10,7 @@ from ..compat import (
|
||||
)
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2104602',
|
||||
'ext': 'mp4',
|
||||
'title': 'Staffel 2, Episode 18 - Jahresrückblick',
|
||||
'title': 'Episode 18 - Staffel 2',
|
||||
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
|
||||
'upload_date': '20131231',
|
||||
'duration': 5845.04,
|
||||
@@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor):
|
||||
urls_sources = urls_sources.values()
|
||||
|
||||
def fix_bitrate(bitrate):
|
||||
bitrate = int_or_none(bitrate)
|
||||
if not bitrate:
|
||||
return None
|
||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
for source in urls_sources:
|
||||
|
170
youtube_dl/extractor/qqmusic.py
Normal file
170
youtube_dl/extractor/qqmusic.py
Normal file
@@ -0,0 +1,170 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import time
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
)
|
||||
from ..compat import compat_urllib_request
|
||||
|
||||
|
||||
class QQMusicIE(InfoExtractor):
|
||||
_VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD',
|
||||
'md5': 'bed90b6db2a7a7a7e11bc585f471f63a',
|
||||
'info_dict': {
|
||||
'id': '004295Et37taLD',
|
||||
'ext': 'm4a',
|
||||
'title': '可惜没如果',
|
||||
'upload_date': '20141227',
|
||||
'creator': '林俊杰',
|
||||
'description': 'md5:4348ff1dd24036906baa7b6f973f8d30',
|
||||
}
|
||||
}]
|
||||
|
||||
# Reference: m_r_GetRUin() in top_player.js
|
||||
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
||||
@staticmethod
|
||||
def m_r_get_ruin():
|
||||
curMs = int(time.time() * 1000) % 1000
|
||||
return int(round(random.random() * 2147483647) * curMs % 1E10)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
|
||||
detail_info_page = self._download_webpage(
|
||||
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid,
|
||||
mid, note='Download song detail info',
|
||||
errnote='Unable to get song detail info', encoding='gbk')
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
r"songname:\s*'([^']+)'", detail_info_page, 'song name')
|
||||
|
||||
publish_time = self._html_search_regex(
|
||||
r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
|
||||
'publish time', default=None)
|
||||
if publish_time:
|
||||
publish_time = publish_time.replace('-', '')
|
||||
|
||||
singer = self._html_search_regex(
|
||||
r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
|
||||
|
||||
lrc_content = self._html_search_regex(
|
||||
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
|
||||
detail_info_page, 'LRC lyrics', default=None)
|
||||
|
||||
guid = self.m_r_get_ruin()
|
||||
|
||||
vkey = self._download_json(
|
||||
'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid,
|
||||
mid, note='Retrieve vkey', errnote='Unable to get vkey',
|
||||
transform_source=strip_jsonp)['key']
|
||||
song_url = 'http://cc.stream.qqmusic.qq.com/C200%s.m4a?vkey=%s&guid=%s&fromtag=0' % (mid, vkey, guid)
|
||||
|
||||
return {
|
||||
'id': mid,
|
||||
'url': song_url,
|
||||
'title': song_name,
|
||||
'upload_date': publish_time,
|
||||
'creator': singer,
|
||||
'description': lrc_content,
|
||||
}
|
||||
|
||||
|
||||
class QQPlaylistBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def qq_static_url(category, mid):
|
||||
return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid)
|
||||
|
||||
@classmethod
|
||||
def get_entries_from_page(cls, page):
|
||||
entries = []
|
||||
|
||||
for item in re.findall(r'class="data"[^<>]*>([^<>]+)</', page):
|
||||
song_mid = unescapeHTML(item).split('|')[-5]
|
||||
entries.append(cls.url_result(
|
||||
'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic',
|
||||
song_mid))
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
||||
_VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2',
|
||||
'info_dict': {
|
||||
'id': '001BLpXF2DyJe2',
|
||||
'title': '林俊杰',
|
||||
'description': 'md5:2a222d89ba4455a3af19940c0481bb78',
|
||||
},
|
||||
'playlist_count': 12,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
|
||||
singer_page = self._download_webpage(
|
||||
self.qq_static_url('singer', mid), mid, 'Download singer page')
|
||||
|
||||
entries = self.get_entries_from_page(singer_page)
|
||||
|
||||
singer_name = self._html_search_regex(
|
||||
r"singername\s*:\s*'([^']+)'", singer_page, 'singer name',
|
||||
default=None)
|
||||
|
||||
singer_id = self._html_search_regex(
|
||||
r"singerid\s*:\s*'([0-9]+)'", singer_page, 'singer id',
|
||||
default=None)
|
||||
|
||||
singer_desc = None
|
||||
|
||||
if singer_id:
|
||||
req = compat_urllib_request.Request(
|
||||
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg?utf8=1&outCharset=utf-8&format=xml&singerid=%s' % singer_id)
|
||||
req.add_header(
|
||||
'Referer', 'http://s.plcloud.music.qq.com/xhr_proxy_utf8.html')
|
||||
singer_desc_page = self._download_xml(
|
||||
req, mid, 'Donwload singer description XML')
|
||||
|
||||
singer_desc = singer_desc_page.find('./data/info/desc').text
|
||||
|
||||
return self.playlist_result(entries, mid, singer_name, singer_desc)
|
||||
|
||||
|
||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||
_VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0',
|
||||
'info_dict': {
|
||||
'id': '000gXCTb2AhRR1',
|
||||
'title': '我们都是这样长大的',
|
||||
'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6',
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid = self._match_id(url)
|
||||
|
||||
album_page = self._download_webpage(
|
||||
self.qq_static_url('album', mid), mid, 'Download album page')
|
||||
|
||||
entries = self.get_entries_from_page(album_page)
|
||||
|
||||
album_name = self._html_search_regex(
|
||||
r"albumname\s*:\s*'([^']+)',", album_page, 'album name',
|
||||
default=None)
|
||||
|
||||
album_detail = self._html_search_regex(
|
||||
r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>',
|
||||
album_page, 'album details', default=None)
|
||||
|
||||
return self.playlist_result(entries, mid, album_name, album_detail)
|
67
youtube_dl/extractor/radiojavan.py
Normal file
67
youtube_dl/extractor/radiojavan.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import(
|
||||
unified_strdate,
|
||||
str_to_int,
|
||||
)
|
||||
|
||||
|
||||
class RadioJavanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
|
||||
_TEST = {
|
||||
'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam',
|
||||
'md5': 'e85208ffa3ca8b83534fca9fe19af95b',
|
||||
'info_dict': {
|
||||
'id': 'chaartaar-ashoobam',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chaartaar - Ashoobam',
|
||||
'thumbnail': 're:^https?://.*\.jpe?g$',
|
||||
'upload_date': '20150215',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
upload_date = unified_strdate(self._search_regex(
|
||||
r'class="date_added">Date added: ([^<]+)<',
|
||||
webpage, 'upload date', fatal=False))
|
||||
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'class="views">Plays: ([\d,]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
like_count = str_to_int(self._search_regex(
|
||||
r'class="rating">([\d,]+) likes',
|
||||
webpage, 'like count', fatal=False))
|
||||
dislike_count = str_to_int(self._search_regex(
|
||||
r'class="rating">([\d,]+) dislikes',
|
||||
webpage, 'dislike count', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'upload_date': upload_date,
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'formats': formats,
|
||||
}
|
@@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class RaiIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
||||
_VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
|
||||
@@ -62,34 +62,78 @@ class RaiIE(InfoExtractor):
|
||||
'description': 'Edizione delle ore 20:30 ',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html',
|
||||
'md5': '02b64456f7cc09f96ff14e7dd489017e',
|
||||
'info_dict': {
|
||||
'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6',
|
||||
'ext': 'flv',
|
||||
'title': 'Il Candidato - Primo episodio: "Le Primarie"',
|
||||
'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!',
|
||||
'uploader': 'RaiTre',
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
def _extract_relinker_url(self, webpage):
|
||||
return self._proto_relative_url(self._search_regex(
|
||||
[r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'],
|
||||
webpage, 'relinker url', default=None))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
host = mobj.group('host')
|
||||
|
||||
media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = media.get('name')
|
||||
description = media.get('desc')
|
||||
thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
|
||||
duration = parse_duration(media.get('length'))
|
||||
uploader = media.get('author')
|
||||
upload_date = unified_strdate(media.get('date'))
|
||||
relinker_url = self._extract_relinker_url(webpage)
|
||||
|
||||
formats = []
|
||||
if not relinker_url:
|
||||
iframe_path = self._search_regex(
|
||||
r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"',
|
||||
webpage, 'iframe')
|
||||
webpage = self._download_webpage(
|
||||
'%s/%s' % (host, iframe_path), video_id)
|
||||
relinker_url = self._extract_relinker_url(webpage)
|
||||
|
||||
for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']:
|
||||
media_url = media.get(format_id)
|
||||
if not media_url:
|
||||
continue
|
||||
formats.append({
|
||||
relinker = self._download_json(
|
||||
'%s&output=47' % relinker_url, video_id)
|
||||
|
||||
media_url = relinker['video'][0]
|
||||
ct = relinker.get('ct')
|
||||
if ct == 'f4m':
|
||||
formats = self._extract_f4m_formats(
|
||||
media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id)
|
||||
else:
|
||||
formats = [{
|
||||
'url': media_url,
|
||||
'format_id': format_id,
|
||||
'ext': 'mp4',
|
||||
})
|
||||
'format_id': ct,
|
||||
}]
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, url)
|
||||
json_link = self._html_search_meta(
|
||||
'jsonlink', webpage, 'JSON link', default=None)
|
||||
if json_link:
|
||||
media = self._download_json(
|
||||
host + json_link, video_id, 'Downloading video JSON')
|
||||
title = media.get('name')
|
||||
description = media.get('desc')
|
||||
thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image')
|
||||
duration = parse_duration(media.get('length'))
|
||||
uploader = media.get('author')
|
||||
upload_date = unified_strdate(media.get('date'))
|
||||
else:
|
||||
title = (self._search_regex(
|
||||
r'var\s+videoTitolo\s*=\s*"(.+?)";',
|
||||
webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"')
|
||||
description = self._og_search_description(webpage)
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
duration = None
|
||||
uploader = self._html_search_meta('Editore', webpage, 'uploader')
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'item-date', webpage, 'upload date', default=None))
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@@ -103,8 +147,7 @@ class RaiIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, url):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
subtitles = {}
|
||||
m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
|
||||
if m:
|
||||
|
@@ -1,17 +1,19 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.redtube.com/66418',
|
||||
'md5': '7b8c22b5e7098a3e1c09709df1126d2d',
|
||||
'info_dict': {
|
||||
'id': '66418',
|
||||
'ext': 'mp4',
|
||||
"title": "Sucked on a toilet",
|
||||
"age_limit": 18,
|
||||
'title': 'Sucked on a toilet',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +21,9 @@ class RedTubeIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
|
||||
raise ExtractorError('Video %s has been removed' % video_id, expected=True)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
|
||||
video_title = self._html_search_regex(
|
||||
|
@@ -8,8 +8,10 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
remove_end,
|
||||
std_headers,
|
||||
struct_unpack,
|
||||
)
|
||||
|
||||
@@ -84,13 +86,22 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||
manager_info = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')
|
||||
self._manager = manager_info['manager']
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id
|
||||
if info['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
if not video_url.endswith('.f4m'):
|
||||
@@ -127,6 +138,47 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEInfantilIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||
video_id)['page']['items'][0]
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vidplayer_id = self._search_regex(
|
||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
||||
|
||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
||||
video_url = _decrypt_url(png)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'ext': 'mp4',
|
||||
'title': info['title'],
|
||||
'url': video_url,
|
||||
'thumbnail': info.get('image'),
|
||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
||||
}
|
||||
|
||||
|
||||
class RTVELiveIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
|
157
youtube_dl/extractor/safari.py
Normal file
157
youtube_dl/extractor/safari.py
Normal file
@@ -0,0 +1,157 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveIE
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_parse,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
std_headers,
|
||||
)
|
||||
|
||||
|
||||
class SafariBaseIE(InfoExtractor):
|
||||
_LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
|
||||
_SUCCESSFUL_LOGIN_REGEX = r'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
|
||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to supply credentials for safaribooksonline.com'
|
||||
_NETRC_MACHINE = 'safari'
|
||||
|
||||
_API_BASE = 'https://www.safaribooksonline.com/api/v1/book'
|
||||
_API_FORMAT = 'json'
|
||||
|
||||
LOGGED_IN = False
|
||||
|
||||
def _real_initialize(self):
|
||||
# We only need to log in once for courses or individual videos
|
||||
if not self.LOGGED_IN:
|
||||
self._login()
|
||||
SafariBaseIE.LOGGED_IN = True
|
||||
|
||||
def _login(self):
|
||||
(username, password) = self._get_login_info()
|
||||
if username is None:
|
||||
raise ExtractorError(
|
||||
self._ACCOUNT_CREDENTIALS_HINT,
|
||||
expected=True)
|
||||
|
||||
headers = std_headers
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = self._LOGIN_URL
|
||||
|
||||
login_page = self._download_webpage(
|
||||
self._LOGIN_URL, None,
|
||||
'Downloading login form')
|
||||
|
||||
csrf = self._html_search_regex(
|
||||
r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
|
||||
login_page, 'csrf token')
|
||||
|
||||
login_form = {
|
||||
'csrfmiddlewaretoken': csrf,
|
||||
'email': username,
|
||||
'password1': password,
|
||||
'login': 'Sign In',
|
||||
'next': '',
|
||||
}
|
||||
|
||||
request = compat_urllib_request.Request(
|
||||
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form), headers=headers)
|
||||
login_page = self._download_webpage(
|
||||
request, None, 'Logging in as %s' % username)
|
||||
|
||||
if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
|
||||
raise ExtractorError(
|
||||
'Login failed; make sure your credentials are correct and try again.',
|
||||
expected=True)
|
||||
|
||||
self.to_screen('Login successful')
|
||||
|
||||
|
||||
class SafariIE(SafariBaseIE):
|
||||
IE_NAME = 'safari'
|
||||
IE_DESC = 'safaribooksonline.com online video'
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?:www\.)?safaribooksonline\.com/
|
||||
(?:
|
||||
library/view/[^/]+|
|
||||
api/v1/book
|
||||
)/
|
||||
(?P<course_id>\d+)/
|
||||
(?:chapter(?:-content)?/)?
|
||||
(?P<part>part\d+)\.html
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
|
||||
'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
|
||||
'info_dict': {
|
||||
'id': '2842601850001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Introduction',
|
||||
},
|
||||
'skip': 'Requires safaribooksonline account credentials',
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
course_id = mobj.group('course_id')
|
||||
part = mobj.group('part')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
|
||||
part)
|
||||
|
||||
bc_url = BrightcoveIE._extract_brightcove_url(webpage)
|
||||
if not bc_url:
|
||||
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
|
||||
|
||||
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove')
|
||||
|
||||
|
||||
class SafariCourseIE(SafariBaseIE):
|
||||
IE_NAME = 'safari:course'
|
||||
IE_DESC = 'safaribooksonline.com online courses'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
|
||||
'info_dict': {
|
||||
'id': '9780133392838',
|
||||
'title': 'Hadoop Fundamentals LiveLessons',
|
||||
},
|
||||
'playlist_count': 22,
|
||||
'skip': 'Requires safaribooksonline account credentials',
|
||||
}, {
|
||||
'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id = self._match_id(url)
|
||||
|
||||
course_json = self._download_json(
|
||||
'%s/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
|
||||
course_id, 'Downloading course JSON')
|
||||
|
||||
if 'chapters' not in course_json:
|
||||
raise ExtractorError(
|
||||
'No chapters found for course %s' % course_id, expected=True)
|
||||
|
||||
entries = [
|
||||
self.url_result(chapter, 'Safari')
|
||||
for chapter in course_json['chapters']]
|
||||
|
||||
course_title = course_json['title']
|
||||
|
||||
return self.playlist_result(entries, course_id, course_title)
|
@@ -81,60 +81,6 @@ class ScreenwaveMediaIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class CinemassacreIE(InfoExtractor):
|
||||
_VALID_URL = 'https?://(?:www\.)?cinemassacre\.com/(?P<date_y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
|
||||
'md5': 'fde81fbafaee331785f58cd6c0d46190',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-19911',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20121110',
|
||||
'title': '“Angry Video Game Nerd: The Movie” – Trailer',
|
||||
'description': 'md5:fb87405fcb42a331742a0dce2708560b',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
|
||||
'md5': 'd72f10cd39eac4215048f62ab477a511',
|
||||
'info_dict': {
|
||||
'id': 'Cinemassacre-521be8ef82b16',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20131002',
|
||||
'title': 'The Mummy’s Hand (1940)',
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
display_id = mobj.group('display_id')
|
||||
video_date = mobj.group('date_y') + mobj.group('date_m') + mobj.group('date_d')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
playerdata_url = self._search_regex(
|
||||
r'src="(http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
|
||||
webpage, 'player data URL')
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(?P<title>.+?)\|', webpage, 'title')
|
||||
video_description = self._html_search_regex(
|
||||
r'<div class="entry-content">(?P<description>.+?)</div>',
|
||||
webpage, 'description', flags=re.DOTALL, fatal=False)
|
||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'upload_date': video_date,
|
||||
'thumbnail': video_thumbnail,
|
||||
'url': playerdata_url,
|
||||
}
|
||||
|
||||
|
||||
class TeamFourIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
|
||||
_TEST = {
|
||||
|
141
youtube_dl/extractor/senateisvp.py
Normal file
141
youtube_dl/extractor/senateisvp.py
Normal file
@@ -0,0 +1,141 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
unsmuggle_url,
|
||||
)
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class SenateISVPIE(InfoExtractor):
|
||||
_COMM_MAP = [
|
||||
["ag", "76440", "http://ag-f.akamaihd.net"],
|
||||
["aging", "76442", "http://aging-f.akamaihd.net"],
|
||||
["approps", "76441", "http://approps-f.akamaihd.net"],
|
||||
["armed", "76445", "http://armed-f.akamaihd.net"],
|
||||
["banking", "76446", "http://banking-f.akamaihd.net"],
|
||||
["budget", "76447", "http://budget-f.akamaihd.net"],
|
||||
["cecc", "76486", "http://srs-f.akamaihd.net"],
|
||||
["commerce", "80177", "http://commerce1-f.akamaihd.net"],
|
||||
["csce", "75229", "http://srs-f.akamaihd.net"],
|
||||
["dpc", "76590", "http://dpc-f.akamaihd.net"],
|
||||
["energy", "76448", "http://energy-f.akamaihd.net"],
|
||||
["epw", "76478", "http://epw-f.akamaihd.net"],
|
||||
["ethics", "76449", "http://ethics-f.akamaihd.net"],
|
||||
["finance", "76450", "http://finance-f.akamaihd.net"],
|
||||
["foreign", "76451", "http://foreign-f.akamaihd.net"],
|
||||
["govtaff", "76453", "http://govtaff-f.akamaihd.net"],
|
||||
["help", "76452", "http://help-f.akamaihd.net"],
|
||||
["indian", "76455", "http://indian-f.akamaihd.net"],
|
||||
["intel", "76456", "http://intel-f.akamaihd.net"],
|
||||
["intlnarc", "76457", "http://intlnarc-f.akamaihd.net"],
|
||||
["jccic", "85180", "http://jccic-f.akamaihd.net"],
|
||||
["jec", "76458", "http://jec-f.akamaihd.net"],
|
||||
["judiciary", "76459", "http://judiciary-f.akamaihd.net"],
|
||||
["rpc", "76591", "http://rpc-f.akamaihd.net"],
|
||||
["rules", "76460", "http://rules-f.akamaihd.net"],
|
||||
["saa", "76489", "http://srs-f.akamaihd.net"],
|
||||
["smbiz", "76461", "http://smbiz-f.akamaihd.net"],
|
||||
["srs", "75229", "http://srs-f.akamaihd.net"],
|
||||
["uscc", "76487", "http://srs-f.akamaihd.net"],
|
||||
["vetaff", "76462", "http://vetaff-f.akamaihd.net"],
|
||||
["arch", "", "http://ussenate-f.akamaihd.net/"]
|
||||
]
|
||||
_IE_NAME = 'senate.gov'
|
||||
_VALID_URL = r'http://www\.senate\.gov/isvp/\?(?P<qs>.+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png',
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'flv',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'thumbnail': 're:^https?://.*\.(?:jpg|png)$',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||
'info_dict': {
|
||||
'id': 'commerce011514',
|
||||
'ext': 'flv',
|
||||
'title': 'Integrated Senate Video Player'
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||
# checksum differs each time
|
||||
'info_dict': {
|
||||
'id': 'intel090613',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player'
|
||||
}
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _search_iframe_url(webpage):
|
||||
mobj = re.search(
|
||||
r"<iframe[^>]+src=['\"](?P<url>http://www\.senate\.gov/isvp/\?[^'\"]+)['\"]",
|
||||
webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
|
||||
def _get_info_for_comm(self, committee):
|
||||
for entry in self._COMM_MAP:
|
||||
if entry[0] == committee:
|
||||
return entry[1:]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs'))
|
||||
if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = re.sub(r'.mp4$', '', qs['filename'][0])
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if smuggled_data.get('force_title'):
|
||||
title = smuggled_data['force_title']
|
||||
else:
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id)
|
||||
poster = qs.get('poster')
|
||||
thumbnail = poster[0] if poster else None
|
||||
|
||||
video_type = qs['type'][0]
|
||||
committee = video_type if video_type == 'arch' else qs['comm'][0]
|
||||
stream_num, domain = self._get_info_for_comm(committee)
|
||||
|
||||
formats = []
|
||||
if video_type == 'arch':
|
||||
filename = video_id if '.' in video_id else video_id + '.mp4'
|
||||
formats = [{
|
||||
# All parameters in the query string are necessary to prevent a 403 error
|
||||
'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=',
|
||||
}]
|
||||
else:
|
||||
hdcore_sign = '?hdcore=3.1.0'
|
||||
url_params = (domain, video_id, stream_num)
|
||||
f4m_url = '%s/z/%s_1@%s/manifest.f4m' % url_params + hdcore_sign
|
||||
m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params
|
||||
for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.append(entry)
|
||||
for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
|
||||
mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
|
||||
if mobj:
|
||||
entry['format_id'] += mobj.group('tag')
|
||||
formats.append(entry)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user