Compare commits

..

242 Commits

Author SHA1 Message Date
Sergey M․
27d8e089a2 release 2018.09.01 2018-09-01 18:40:23 +07:00
Sergey M․
7bbc1b189a [ChangeLog] Actualize
[ci skip]
2018-09-01 18:36:18 +07:00
LangerJan
0b87e88453 [ard] Add support for one.ard.de 2018-09-01 16:42:30 +07:00
Gorfiend
4d59db5b90 [niconico] Fix extraction on python3 (closes #17393) 2018-09-01 16:04:45 +07:00
Remita Amine
4627995882 [crunchyroll] limit VRVIE inheritance to CrunchyrollIE 2018-09-01 10:04:10 +01:00
Remita Amine
7f2611cb5b [ard] extract f4m formats 2018-09-01 08:40:38 +01:00
Remita Amine
54a5be4dba [crunchyroll] parse vilos media data(closes #17343) 2018-09-01 08:16:41 +01:00
Philipp Hagemeister
ed6919e737 [ard] beta mediathek: make regexp for JSON more robust 2018-09-01 01:59:13 +02:00
Philipp Hagemeister
2b83da2463 [ard] Better format handling
Skip f4m, doesn't work (yet); correctly extract m3u8, and prefer plain HTTP files.
2018-09-01 00:45:36 +02:00
Philipp Hagemeister
c1a37eb24a [ard] Add support for Beta ARD Mediathek
Thanks to https://blog.fefe.de/?ts=a577685d for pointing out support is missing.
2018-09-01 00:18:17 +02:00
Sergey M․
4991e16c2a [bandcamp] Extract more metadata (closes #13197) 2018-08-31 03:35:55 +07:00
Parmjit Virk
14b7a24c19 [bandcamp] Extract track_number (closes #17266) 2018-08-31 02:32:35 +07:00
Leonardo Taccari
73f3bdbeb4 [internazionale] Fix extraction of non-available-abroad videos 2018-08-31 02:15:46 +07:00
Sergey M․
9e21e6d96b [utils] Improve remote address skipping and add support for python 2.6 (closes #17362) 2018-08-29 01:18:03 +07:00
Andrew Udvare
8959018a5f [utils] Skip remote IP addresses non matching to source address' IP version (closes #13422) 2018-08-29 01:17:53 +07:00
Sergey M․
eebbce5656 release 2018.08.28 2018-08-28 03:10:09 +07:00
Sergey M․
56213aff1d [ChangeLog] Actualize
[ci skip]
2018-08-28 03:07:18 +07:00
Sergey M․
409b9324da [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix) (closes #17361) 2018-08-28 02:20:32 +07:00
Andrew Udvare
02df41354c [bitchute] Fix extraction by pass custom User-Agent 2018-08-27 22:04:56 +07:00
Sergey M․
dd88fd65a5 [webofstories:playlist] Fix extraction (closes #16914) 2018-08-26 21:42:30 +07:00
Sergey M․
287cf7e443 [generic] Remove unused import 2018-08-26 20:09:02 +07:00
Sergey M․
dac6f7654a [tvplayhome] Add extractor (closes #17344) 2018-08-26 20:08:55 +07:00
Sergey M․
e0b6e98871 [generic] Allow relative src for videojs embeds (closes #17324) 2018-08-24 23:12:53 +07:00
Sergey M․
beff09505c [xfileshare] Add support for vidto.se (closes #17317) 2018-08-24 04:00:35 +07:00
Sergey M․
135e6a1c10 [vidzi] Add support for vidzi.nu (closes #17316) 2018-08-24 02:36:56 +07:00
Sergey M․
c707d2067d [nova:embed] Add extractor (closes #17282) 2018-08-22 23:18:07 +07:00
Sergey M․
4c86163b60 release 2018.08.22 2018-08-22 02:32:18 +07:00
Sergey M․
b662273989 [ChangeLog] Actualize
[ci skip]
2018-08-22 02:28:25 +07:00
Sergey M․
df4d817bc3 [kinopoisk] Add extractor (closes #17283) 2018-08-22 02:19:30 +07:00
Sergey M․
db192b2932 [yourporn] Add extractor (closes #17298) 2018-08-22 01:44:22 +07:00
Sergey M․
52007de8ca [go] Add support for disneynow.go.com (closes #16299, closes #17264) 2018-08-22 01:14:47 +07:00
hmlinaric
28f96cf407 [6play] Add support for play.rtl.hr 2018-08-22 00:06:27 +07:00
Sergey M․
eda86b4335 [anvato] Fallback to generic API key for access key to API key lookup (closes #16788, closes #17254) 2018-08-21 23:45:18 +07:00
Sergey M․
bf1245d236 [lci] Fix extraction (closes #17274) 2018-08-20 02:15:48 +07:00
Sergey M․
6f356cbbcf [bbccouk] Extend _ID_REGEX (closes #17270) 2018-08-20 02:05:07 +07:00
Remita Amine
0a74b45191 [cwtv] fix extraction(closes #17256) 2018-08-17 11:59:49 +01:00
Sergey M․
d6ef8b4dd4 [nova] Fix extraction (closes #17241) 2018-08-16 00:11:41 +07:00
Sergey M․
60c0856223 [utils] Use pure browser header for User-Agent (closes #17236) 2018-08-14 23:27:12 +07:00
Sergey M․
57c68ec4c3 [generic] Add support for expressen embeds 2018-08-14 22:51:44 +07:00
Sergey M․
24e0cd709f [raywenderlich] Adapt to site redesign (closes #17225) 2018-08-13 00:15:59 +07:00
Remita Amine
4779420ce8 [redbulltv] add support redbull.com tv URLs(closes #17218) 2018-08-12 05:31:34 +01:00
Sergey M․
de4c41b437 [bitchute] Improve page offset 2018-08-12 01:52:50 +07:00
Sergey M․
b65e3b0636 [bitchute] Add extractor (closes #14052) 2018-08-12 01:47:10 +07:00
Sergey M․
d37dc6e1c9 [clyp] Add support for token protected media (closes #17184) 2018-08-07 23:27:08 +07:00
Sergey M․
a62460aa21 [imdb] Fix extension extraction (closes #17167) 2018-08-06 04:37:03 +07:00
Stanny Nuytkens
d588d4a5a6 [.gitignore] Add .vscode 2018-08-05 22:10:01 +07:00
Sergey M․
81cc22bab6 release 2018.08.04 2018-08-04 01:23:24 +07:00
Sergey M․
20f96f64bd [ChangeLog] Actualize
[ci skip]
2018-08-04 01:21:23 +07:00
Sergey M․
af322eb830 [funk:channel] Improve byChannelAlias extraction (closes #17142) 2018-08-04 00:26:58 +07:00
Sergey M․
cb1c3a3c07 [twitch] Update cliend id and modernize (closes #17126) 2018-08-03 22:44:31 +07:00
Tim Broder
48afc6ca3e [twitch] Fix authentication (closes #17024) 2018-08-03 22:43:23 +07:00
Sergey M․
644921b372 [twitch:vod] Improve _VALID_URL (closes #17135) 2018-08-02 23:16:15 +07:00
Sergey M․
19b9de13c4 [watchbox] Fix extraction (closes #17107) 2018-07-30 23:28:44 +07:00
Sergey M․
6f2d82a5a0 [pbs] Fix extraction (closes #17109) 2018-07-30 23:10:40 +07:00
Giuseppe Fabiano
7ff129d3ea [theplatform] Relax _VALID_URL (closes #16181) 2018-07-30 03:15:06 +07:00
Sergey M․
9d1b213845 [viqeo] Add extractor (closes #17066) 2018-07-30 03:05:36 +07:00
Sergey M․
5484828418 release 2018.07.29 2018-07-29 07:02:18 +07:00
Sergey M․
4eecef84f3 [ChangeLog] Actualize
[ci skip]
2018-07-29 06:59:39 +07:00
bato3
b2286f8fb2 [crunchyroll:playlist] Restrict _VALID_URL (closes #17069) 2018-07-29 06:56:52 +07:00
Giuseppe Fabiano
4938c8d573 [pornhub] Add support for subtitles (closes #16924) 2018-07-29 06:24:10 +07:00
bato3
1a88fc5a69 [ceskatelevize] Use https for API call (refs #16997) 2018-07-29 06:04:59 +07:00
Huyuumi
38e87f6c2a [utils] Remove return from __init__ 2018-07-29 05:52:42 +07:00
Remita Amine
ec240a4369 [dailymotion:playlist] fix extraction(closes #16894) 2018-07-28 20:30:44 +01:00
Sergey M․
cd3a3ff93b [ted] Improve extraction and update tests 2018-07-28 22:09:53 +07:00
Sergey M․
9a984265b9 [ted] Fix extraction for videos without nativeDownloads (closes #16756, closes #17085) 2018-07-28 21:26:23 +07:00
Remita Amine
a098c99f0d [telecinco] fix extraction(closes #17080) 2018-07-28 06:55:42 +01:00
Remita Amine
8e37a7e4cc [mitele] reduce number of requests and update tests 2018-07-28 06:55:42 +01:00
Sidney de Koning
722f1a0f8f [README.md] Actualize Firefox cookie export add-on
Previous one does not work with newer Firefox versions
2018-07-28 00:18:41 +07:00
Remita Amine
0c7b4f49eb [rai] return non http relinker URL intact(closes #17055) 2018-07-26 08:11:06 +01:00
Remita Amine
ad1bc71a8a [vk] fix extraction for inline only videos(fixes #16923) 2018-07-26 07:25:07 +01:00
Sergey M․
b5dec62ca6 [streamcloud] Fix extraction (closes #17054) 2018-07-25 23:07:12 +07:00
Remita Amine
631f93ee2d [facebook] fix tahoe request for authenticated users(closes #16655) 2018-07-23 06:20:18 +01:00
Sergey M․
d4e7065111 Credit @Kerruba for #16328 2018-07-22 21:36:58 +07:00
Sergey M․
234a85858c Credit @tmsbrg for #15462 2018-07-22 21:35:38 +07:00
Sergey M․
a789d1cc90 Credit @nathanrossi for #16554 2018-07-22 21:34:34 +07:00
Sergey M․
694079dff7 Credit @mrfade for #16269 and #16271 2018-07-22 21:31:46 +07:00
Sergey M․
d94fb1225e Credit @dnet for #16174 2018-07-22 21:29:25 +07:00
Sergey M․
7930f91494 Credit @haasn for #16326 2018-07-22 21:27:28 +07:00
Sergey M․
a702056fbe Credit @bastiandg for #16189 2018-07-22 21:26:12 +07:00
Sergey M․
8fd2a7be37 [puhutv] Improve extraction (closes #16269) 2018-07-22 20:36:48 +07:00
Enes
6de82b4476 [puhutv] Add extractor (closes #16010) 2018-07-22 20:36:48 +07:00
Sergey M․
8e66ffc3b7 release 2018.07.21 2018-07-21 21:00:18 +07:00
Sergey M․
6f27998e75 [ChangeLog] Actualize
[ci skip]
2018-07-21 20:58:30 +07:00
Sergey M․
3052a30d42 Improve URL extraction 2018-07-21 19:08:28 +07:00
Sergey M․
4ecf300d13 [iwara] Improve extraction 2018-07-21 18:03:58 +07:00
Sergey M․
af03000ad5 [utils] Introduce url_or_none 2018-07-21 18:03:58 +07:00
Remita Amine
b96b4be461 [bbc] add support for BBC Radio Play pages(closes #17022) 2018-07-21 11:50:14 +01:00
Kazuma Takahara
edb0e17188 [iwara] Fix download URLs (closes #17026) 2018-07-21 17:41:33 +07:00
Sergey M․
e9c671d5e8 [utils] Allow JSONP with empty func name (closes #17028) 2018-07-21 12:30:18 +07:00
Sergey M․
fd62b36680 [vrtnu] Relax title extraction and extract JSON-LD (closes #17018) 2018-07-20 02:39:20 +07:00
Sergey M․
25586c601c [theplatform] PEP 8
[ci skip]
2018-07-20 00:49:14 +07:00
Sergey M․
ecb6b6ae2d [viu] Pass area id 2018-07-20 00:46:50 +07:00
Sergey M․
c258570edd [viu] Pass Referer and Origin headers (closes #16992) 2018-07-20 00:01:43 +07:00
Sergey M․
6fc09f0155 [vimeo] Add another config regex (closes #17013) 2018-07-19 23:15:27 +07:00
Sergey M․
11330f5121 [facebook] Extract view count and update tests (closes #16942) 2018-07-19 02:26:05 +07:00
bato3
8da17f9680 [dailymotion] Improve description extraction (closes #16984) 2018-07-19 02:04:05 +07:00
Sergey M․
c63f5fb863 [slutload] Fix and improve extraction (closes #17001) 2018-07-19 01:59:00 +07:00
Remita Amine
38f1eb0ac3 [mediaset] fix extraction(closes #16977) 2018-07-18 18:34:04 +01:00
Remita Amine
371dcc1dd4 [theplatform] add support for theplatform Top-level domain customization(#16977) 2018-07-18 18:34:04 +01:00
Remita Amine
bd21ead2a2 [extractor/common] add support for DASH and MSS formats extraction in SMIL manifests 2018-07-18 18:34:04 +01:00
Jakub Wilk
905eef2b06 [imgur] Allow digits in filename extension 2018-07-18 23:47:26 +07:00
Sergey M․
79367a9820 [pornhub] Improve extraction and extract all formats (closes #12166, closes #15891, closes #16262, closes #16959) 2018-07-14 18:05:06 +07:00
Sergey M․
40a051fa9f release 2018.07.10 2018-07-10 02:09:51 +07:00
Sergey M․
7e8e948cf7 [ChangeLog] Actualize
[ci skip]
2018-07-10 02:08:15 +07:00
Sergey M․
4b3ee09886 [nrktv] Add support for new season and serie URL schema 2018-07-10 00:27:24 +07:00
Sergey M․
79fd7320e2 [nrktv] Add support for new episode URL schema (closes #16909) 2018-07-09 23:44:05 +07:00
Sergey M․
0685d9727b [utils] Share JSON-LD regex 2018-07-09 23:43:05 +07:00
Sergey M․
e06632e3fe [downloader/dash] Improve error handling (#16927) 2018-07-08 08:22:56 +07:00
Sergey M․
69fcdb845b [frontendmasters] Fix issues and improve extraction (closes #3661, closes #16328) 2018-07-08 00:55:49 +07:00
Luca Cherubin
6868d272e5 [frontendmasters] Add extractor 2018-07-08 00:49:15 +07:00
Sergey M․
4742150788 [funk] Fix extraction (closes #16918) 2018-07-06 23:50:02 +07:00
Aaron Brager
4e71dfd819 [README.md] Rename OS X to macOS 2018-07-05 22:17:18 +07:00
Sergey M․
1ed0b2f74d [watchbox] Fix extraction (closes #16904) 2018-07-05 02:22:15 +07:00
Sergey M․
e15141adae [dplayit] Sort formats 2018-07-05 02:14:50 +07:00
Sergey M․
94fef94d9c [dplayit] Fix extraction (closes #16901) 2018-07-05 02:14:06 +07:00
Sergey M․
9a6628aaf9 [youtube] Improve login error handling (closes #13822) 2018-07-05 00:37:32 +07:00
Sergey M․
689af4960e release 2018.07.04 2018-07-04 04:59:21 +07:00
Sergey M․
d5de0f21b9 [ChangeLog] Actualize
[ci skip]
2018-07-04 04:57:17 +07:00
Sergey M․
24d26ab380 [lynda] PEP 8 2018-07-04 04:49:03 +07:00
Sergey M․
836ef4840f [pluralsight] Switch to graphql (closes #16889, closes #16899) 2018-07-04 04:48:40 +07:00
Sergey M․
5621c3222e [lynda] Simplify login and improve error capturing (#16891) 2018-07-03 02:47:09 +07:00
Remita Amine
db5debf313 [go90] add support for embed urls(closes #16873) 2018-07-01 22:41:32 +01:00
Remita Amine
8cee692b8b [go90] detect geo restriction error and pass geo verification headers(closes #16874) 2018-07-01 22:41:32 +01:00
coreynicholson
973b6ceebb [vlive] Fix live streams extraction 2018-07-01 21:19:17 +07:00
Sergey M․
eca1f0d115 [extractor/common] Properly escape % in MPD templates (closes #16867) 2018-07-01 02:11:36 +07:00
Sergey M․
2160768a21 [npo] Fix typo (closes #16872) 2018-06-30 23:39:56 +07:00
Sergey M․
267d81962a [mediaset] Fix issues and extract all formats (closes #16568) 2018-06-30 02:19:02 +07:00
Timendum
9cf648c92b [mediaset] Add support for new videos 2018-06-30 02:17:51 +07:00
Sergey M․
5e8e2fa51f [extractor/common] Use source URL as Referer for HTML5 entries (closes #16849) 2018-06-29 01:25:05 +07:00
Sergey M․
d4a24f4091 Prefer ffmpeg over avconv by default (closes #8622) 2018-06-29 01:09:14 +07:00
Sergey M․
acbd0ff5df [dctptv] Restore extraction based on REST API (closes #16850) 2018-06-29 00:35:05 +07:00
Sergey M․
7b393f9cc5 [svt] Improve extraction and add support for pages (closes #16802) 2018-06-28 04:29:11 +07:00
Sergey M․
c3bcd206eb [porncom] Fix extraction (closes #16808) 2018-06-26 00:01:06 +07:00
Sergey M․
1f6cc5807e release 2018.06.25 2018-06-25 02:26:02 +07:00
Sergey M․
c306f076ec [ChangeLog] Actualize
[ci skip]
2018-06-25 02:17:14 +07:00
Sergey M․
a0949fec08 [joj] Relax _VALID_URL (closes #16771) 2018-06-24 23:57:22 +07:00
Remita Amine
74caf528bc [brightcove] workaround sonyliv DRM protected videos(closes #16807) 2018-06-24 12:02:32 +01:00
Sergey M․
9fb62e35f6 [motherless:group] Fix _VALID_URL 2018-06-21 23:39:13 +07:00
Sergey M․
b71cc71910 [motherless] Fix extraction (closes #16786) 2018-06-21 23:38:32 +07:00
Sergey M․
a4ec45179e [itv] Sort imports 2018-06-21 23:12:40 +07:00
Sergey M․
30374f4d40 [itv] Make SOAP request non fatal and extract metadata from a webpage (closes #16780) 2018-06-21 23:06:58 +07:00
Sergey M․
91aa502d91 [foxnews:insider] Remove extractor (#15810)
Now covered by foxnews:article
2018-06-20 23:59:37 +07:00
Sergey M․
f51f526b0a [foxnews] Add support for iframe embeds (closes #15810, closes #16711) 2018-06-20 23:53:37 +07:00
Sergey M․
c9b983ff82 release 2018.06.19 2018-06-19 23:16:04 +07:00
Sergey M․
e730508827 [ChangeLog] Actualize
[ci skip]
2018-06-19 23:12:53 +07:00
Sergey M․
8b4b400aef [peertube] Improve generic support (closes #16733) 2018-06-19 23:00:36 +07:00
Remita Amine
e12b4b8bcc [6play] use geo verfication headers 2018-06-19 10:35:57 +01:00
Remita Amine
18806e3b6b [rtbf] fix extraction for python 3.2 and older 2018-06-18 19:10:43 +01:00
Sergey M․
713afa705c [vgtv] Improve HLS formats extraction 2018-06-18 23:15:38 +07:00
Sergey M․
721a877d2f [vgtv] Add support for www.aftonbladet.se/tv/ URLs 2018-06-18 23:08:35 +07:00
Sergey M․
9283d4ea03 [bbccouk] Use expected_status 2018-06-18 04:54:59 +07:00
Sergey M․
00a429bea3 [markiza] Expect 500 status code 2018-06-18 04:54:52 +07:00
Sergey M․
d391b7e23d [extractor/common] Introduce expected_status for convenient accept of failed HTTP requests
Useful when some non-success (2xx) HTTP status codes should be considered normal. Previously this required to manually catch corresponding exceptions and read the response.
2018-06-18 04:54:08 +07:00
Sergey M․
075a13d3e9 [compat] Introduce compat_integer_types 2018-06-18 04:52:58 +07:00
Remita Amine
8ba84e4600 [tvnow] try all clear manifest urls(closes #15361) 2018-06-17 20:41:09 +01:00
Sergey M․
858cf4dc29 release 2018.06.18 2018-06-18 01:34:36 +07:00
Sergey M․
9e761fe6f5 [ChangeLog] Actualize
[ci skip]
2018-06-18 01:31:49 +07:00
Sergey M․
ce0edda0f9 [markiza] Add extractors (closes #16750) 2018-06-18 01:17:47 +07:00
Remita Amine
0adf213d8c [wat] try all supported adaptive urls 2018-06-17 15:56:52 +01:00
Remita Amine
8b183bd5f8 [tf1] try all supported adaptive urls 2018-06-17 15:53:29 +01:00
Remita Amine
1882511754 [6play] add support for rtlplay.be and extract hd usp formats 2018-06-17 12:01:14 +01:00
Remita Amine
764cd4e6f3 [rtbf] improve extraction
- add support for audio and live streams(closes #11923)(closes #9638)
- extract HLS, DASH and all HTTP formats
- extract subtitles
- fixup specific http urls(fixes #16101)
2018-06-17 03:13:41 +01:00
Sergey M․
734d461ca0 [expressen] Add extractor 2018-06-16 21:15:06 +07:00
Urgau
81c5df4f2c [vidzi] Fix extraction (closes #16678) 2018-06-16 05:08:44 +07:00
Sergey M․
87f89dacdd [pbs] Improve extraction (closes #16623, closes #16684) 2018-06-16 02:55:20 +07:00
Sergey M․
9b0b627534 [downloader/rtmp] Fix downloading in verbose mode (closes #16736) 2018-06-15 02:59:15 +07:00
Sergey M․
61cb66830f [bilibili] Restrict cid regex (closes #16638, closes #16734) 2018-06-14 22:40:30 +07:00
Sergey M․
c797db4a2f release 2018.06.14 2018-06-14 01:24:53 +07:00
Sergey M․
03eef0f032 [ChangeLog] Actualize
[ci skip]
2018-06-14 01:22:42 +07:00
Remita Amine
aa56061627 [discoverynetworks] Add support for disco-api videos(closes #16724) 2018-06-13 16:46:59 +01:00
Remita Amine
18d66f0410 [dailymotion] use compat_struct_pack 2018-06-13 15:12:42 +01:00
Remita Amine
f15f7a674b [dailymotion] add support for password protected videos(closes #9789) 2018-06-13 14:51:19 +01:00
Sergey M․
9aca7fe6a3 [abc:iview] Extract more series metadata 2018-06-12 20:25:50 +07:00
Remita Amine
e0671819e7 [abc] fix ABC IView extraction and add support for livestreams(closes #16704)(closes #12354) 2018-06-12 13:07:57 +01:00
Sergey M․
5d6c81b63f [downloader/http] Fix resume when writing ot stdout (closes #16699) 2018-06-12 03:12:29 +07:00
Sergey M․
dc53c78634 [crackle] Add support for sonycrackle.com (closes #16698) 2018-06-12 02:06:30 +07:00
Sergey M․
7dc9c60b4b [tvnet] Fix _VALID_URL 2018-06-12 02:05:58 +07:00
Sergey M․
e51752754d [tvnet] Improve video id extraction 2018-06-12 01:50:43 +07:00
Sergey M․
0645be49cb [inc] PEP 8 2018-06-12 01:41:23 +07:00
Sergey M․
a572ae6114 [tvnet] Improve and fix issues (closes #15462) 2018-06-12 01:37:34 +07:00
Thomas van der Berg
b2df66aeca [tvnet] Add extractor 2018-06-12 01:37:29 +07:00
Sergey M․
93cffb1444 [nrk] Update API hosts and try all previously known ones (closes #16690) 2018-06-11 03:08:36 +07:00
Sergey M․
d253df2f65 [wimp] Fix Youtube embeds extraction 2018-06-11 02:40:17 +07:00
Sergey M․
e8c6afc168 release 2018.06.11 2018-06-11 01:57:30 +07:00
Sergey M․
cc37cc3f99 [ChangeLog] Actualize
[ci skip]
2018-06-11 01:55:16 +07:00
Sergey M․
9d581efe05 [npo] Extend _VALID_URL (closes #16682) 2018-06-10 00:26:16 +07:00
Sergey M․
ff2e486221 [inc] Add support for another embed schema (closes #16666) 2018-06-09 02:53:04 +07:00
Remita Amine
6ae36035d9 [tv4] fix format extraction(closes #16650) 2018-06-06 00:41:08 +01:00
Remita Amine
9afd74d705 [nexx] extract free cdn http formats 2018-06-05 01:02:46 +01:00
Sergey M․
2e6975306a [nexx] Update tests 2018-06-05 02:59:25 +07:00
Sergey M․
06ea7bdd99 [nexx] Add support for free cdn (closes #16538) 2018-06-05 02:55:54 +07:00
Sergey M․
d7be705308 [pbs] Add another cove id pattern (closes #15373) 2018-06-05 00:17:26 +07:00
Sergey M․
2e190c2ad9 [rbmaradio] Add support for 192k format (closes #16631) 2018-06-04 23:51:25 +07:00
Sergey M․
94418c8eb3 release 2018.06.04 2018-06-04 02:41:53 +07:00
Sergey M․
f7560859a3 [devscripts/update-copyright] Update copyright year 2018-06-04 02:33:54 +07:00
Sergey M․
c6c478f40d [ChangeLog] Actualize
[ci skip]
2018-06-04 02:16:33 +07:00
Sergey M․
c3023e9f2e [camtube] Add extractor 2018-06-03 17:09:20 +07:00
Sergey M․
77053237c5 [twitter:card] Generalize base API URL 2018-06-03 15:58:12 +07:00
Sergey M․
b6b2ccb72f [twitter:card] Extract guest token (closes #16609) 2018-06-03 15:57:45 +07:00
Sergey M․
0a10f50e2f [chaturbate] Use geo verification headers 2018-06-03 04:30:33 +07:00
Sergey M․
6d155707e6 [bbc] Add support for bbcthree (closes #16612) 2018-06-03 04:07:59 +07:00
Sergey M․
eb6793ba97 [youtube] Update tests 2018-06-03 02:23:45 +07:00
Sergey M․
7e72694b5e [youtube] Move metadata extraction after video availability check 2018-06-03 02:08:38 +07:00
Sergey M․
936784b272 [youtube] Extract track and artist 2018-06-03 02:05:14 +07:00
Sergey M․
003fe73ccf [safari] Add support for new URL schema (closes #16614) 2018-06-03 00:53:11 +07:00
Remita Amine
1ea559c445 [adn] fix extraction 2018-06-02 18:14:22 +01:00
Sergey M․
19e42ead9b release 2018.06.02 2018-06-02 01:51:31 +07:00
Sergey M․
73c938e460 [ChangeLog] Actualize
[ci skip]
2018-06-02 01:49:48 +07:00
Sergey M․
9b89daefa6 [facebook] Improve extraction (closes #16554) 2018-06-02 01:42:05 +07:00
Nathan Rossi
9d082e7cb8 [facebook] Add support for tahoe player videos (closes #15441)
Specific videos appear to use a newer/different player, this requires a
second request for the video data as the initial request is missing the
specified data.

Additionally these videos have different page content for the uploader
value, which is stored in the `<meta property="og:title"...>` element of
the initial request.
2018-06-02 01:32:53 +07:00
Sergey M․
f20f636596 [cbc] Improve extraction (closes #16583, closes #16593) 2018-06-02 00:35:07 +07:00
Logan Fleur
b995043ab8 Ignore venv directory 2018-06-02 00:18:57 +07:00
Enes
85750f8972 [openload] Improve ext extraction 2018-06-02 00:16:22 +07:00
Sergey M․
926d97fc6b [9c9media] PEP 8 2018-06-01 05:17:49 +07:00
Sergey M․
2593725a9b [twitter:card] Add support for another endpoint (closes #16586) 2018-06-01 05:16:00 +07:00
DroidFreak32
0bfdcc1495 [openload] Add support for oload.win and oload.download 2018-05-31 22:01:44 +07:00
Remita Amine
c3f75e2454 [audimedia] fix extraction(closes #15309) 2018-05-31 12:39:45 +01:00
Remita Amine
3a8e3730c1 [francetv] add support for sport.francetvinfo.fr(closes #15645) 2018-05-31 11:40:37 +01:00
Remita Amine
acca2ac7f3 [mlb] improve extraction(closes #16587) 2018-05-31 02:50:14 +01:00
Remita Amine
128b58ad13 [nhl] remove old extractors 2018-05-31 02:49:35 +01:00
Remita Amine
4fd1437d9d [rbmaradio] check formats availability(closes #16585) 2018-05-30 17:08:32 +01:00
Sergey M․
e425710554 release 2018.05.30 2018-05-30 21:54:30 +07:00
Sergey M․
bc3143ac5e [ChangeLog] Actualize
[ci skip]
2018-05-30 21:52:33 +07:00
Remita Amine
e0d42dd4b2 [teamcoco] Fix extraction for full episodes(closes #16573) 2018-05-30 13:21:07 +01:00
Remita Amine
a07879d6b2 [spiegel] fix info extraction(#16538) 2018-05-28 00:10:46 +01:00
Sergey M․
cfd7f2a636 [apa] Add extractor (closes #15041, closes #15672) 2018-05-27 18:24:54 +07:00
Remita Amine
9c65c4a6cd [bellmedia] add support for bnnbloomberg.ca(#16560) 2018-05-27 12:11:53 +01:00
Remita Amine
c9e12a618c [9c9media] extract mpd formats and subtitles 2018-05-27 12:10:12 +01:00
Sergey M․
8882840ec5 [cammodels] Use geo verification headers 2018-05-26 22:22:58 +07:00
Sergey M․
2ce35d9f43 [cammodels] Add another error pattern 2018-05-26 22:22:58 +07:00
Sergey M․
f16f48779c [downloader/rtmp] Generalize download messages and report time elapsed on finish 2018-05-26 22:22:58 +07:00
Sergey M․
ddd8486a44 [downloader/rtmp] Gracefully handle live streams interrupted by user 2018-05-26 22:22:58 +07:00
Remita Amine
68217024e8 remove unnecessary assignment parenthesis 2018-05-26 16:13:54 +01:00
Remita Amine
ec2f3d2800 [ufctv] add support for authentication(closes #16542) 2018-05-26 16:13:54 +01:00
Sergey M․
8b1da46e8f [cammodels] Improve and simplify (closes #14499) 2018-05-26 21:25:30 +07:00
mars67857
2a49d01992 [cammodels] Add extractor 2018-05-26 21:25:21 +07:00
Remita Amine
261f47306c [utils] fix style id extraction for namespaced id attribute(closes #16551) 2018-05-26 14:38:24 +01:00
Remita Amine
c0fd20abca [soundcloud] detect format extension(closes #16549) 2018-05-26 14:38:24 +01:00
Parmjit Virk
986c0b0215 [cbc] Fix playlist title extraction (closes #16502) 2018-05-26 20:05:54 +07:00
Sergey M․
97b01144bd [tumblr] Detect and report sensitive media (closes #13829) 2018-05-26 20:00:00 +07:00
Sergey M․
56cd31f320 [tumblr] Improve authentication (closes #15133) 2018-05-26 19:59:35 +07:00
Zack Fernandes
c678192af3 [tumblr] Add support for authentication 2018-05-26 19:56:01 +07:00
176 changed files with 6179 additions and 2513 deletions

View File

@@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.05.26*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.05.26**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.01**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.05.26
[debug] youtube-dl version 2018.09.01
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

4
.gitignore vendored
View File

@@ -47,3 +47,7 @@ youtube-dl.zsh
*.iml
tmp/
venv/
# VS Code related files
.vscode

View File

@@ -239,3 +239,10 @@ Martin Weinelt
Surya Oktafendri
TingPing
Alexandre Macabies
Bastian de Groot
Niklas Haas
András Veres-Szentkirályi
Enes Solak
Nathan Rossi
Thomas van der Berg
Luca Cherubin

275
ChangeLog
View File

@@ -1,3 +1,278 @@
version 2018.09.01
Core
* [utils] Skip remote IP addresses non matching to source address' IP version
when creating a connection (#13422, #17362)
Extractors
+ [ard] Add support for one.ard.de (#17397)
* [niconico] Fix extraction on python3 (#17393, #17407)
* [ard] Extract f4m formats
* [crunchyroll] Parse vilos media data (#17343)
+ [ard] Add support for Beta ARD Mediathek
+ [bandcamp] Extract more metadata (#13197)
* [internazionale] Fix extraction of non-available-abroad videos (#17386)
version 2018.08.28
Extractors
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
(#17361)
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
* [webofstories:playlist] Fix extraction (#16914)
+ [tvplayhome] Add support for new tvplay URLs (#17344)
+ [generic] Allow relative src for videojs embeds (#17324)
+ [xfileshare] Add support for vidto.se (#17317)
+ [vidzi] Add support for vidzi.nu (#17316)
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
version 2018.08.22
Core
* [utils] Use pure browser header for User-Agent (#17236)
Extractors
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
+ [yourporn] Add support for yourporn.sexy (#17298)
+ [go] Add support for disneynow.go.com (#16299, #17264)
+ [6play] Add support for play.rtl.hr (#17249)
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
(#16788, #17254)
* [lci] Fix extraction (#17274)
* [bbccouk] Extend id URL regular expression (#17270)
* [cwtv] Fix extraction (#17256)
* [nova] Fix extraction (#17241)
+ [generic] Add support for expressen embeds
* [raywenderlich] Adapt to site redesign (#17225)
+ [redbulltv] Add support redbull.com tv URLs (#17218)
+ [bitchute] Add support for bitchute.com (#14052)
+ [clyp] Add support for token protected media (#17184)
* [imdb] Fix extension extraction (#17167)
version 2018.08.04
Extractors
* [funk:channel] Improve byChannelAlias extraction (#17142)
* [twitch] Fix authentication (#17024, #17126)
* [twitch:vod] Improve URL regular expression (#17135)
* [watchbox] Fix extraction (#17107)
* [pbs] Fix extraction (#17109)
* [theplatform] Relax URL regular expression (#16181, #17097)
+ [viqeo] Add support for viqeo.tv (#17066)
version 2018.07.29
Extractors
* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076)
+ [pornhub] Add support for subtitles (#16924, #17088)
* [ceskatelevize] Use https for API call (#16997, #16999)
* [dailymotion:playlist] Fix extraction (#16894)
* [ted] Improve extraction
* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085)
* [telecinco] Fix extraction (#17080)
* [mitele] Reduce number of requests
* [rai] Return non HTTP relinker URL intact (#17055)
* [vk] Fix extraction for inline only videos (#16923)
* [streamcloud] Fix extraction (#17054)
* [facebook] Fix tahoe player extraction with authentication (#16655)
+ [puhutv] Add support for puhutv.com (#12712, #16010, #16269)
version 2018.07.21
Core
+ [utils] Introduce url_or_none
* [utils] Allow JSONP without function name (#17028)
+ [extractor/common] Extract DASH and MSS formats from SMIL manifests
Extractors
+ [bbc] Add support for BBC Radio Play pages (#17022)
* [iwara] Fix download URLs (#17026)
* [vrtnu] Relax title extraction and extract JSON-LD (#17018)
+ [viu] Pass Referer and Origin headers and area id (#16992)
+ [vimeo] Add another config regular expression (#17013)
+ [facebook] Extract view count (#16942)
* [dailymotion] Improve description extraction (#16984)
* [slutload] Fix and improve extraction (#17001)
* [mediaset] Fix extraction (#16977)
+ [theplatform] Add support for theplatform TLD customization (#16977)
* [imgur] Relax URL regular expression (#16987)
* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262,
#16959)
version 2018.07.10
Core
* [utils] Share JSON-LD regular expression
* [downloader/dash] Improve error handling (#16927)
Extractors
+ [nrktv] Add support for new season and serie URL schema
+ [nrktv] Add support for new episode URL schema (#16909)
+ [frontendmasters] Add support for frontendmasters.com (#3661, #16328)
* [funk] Fix extraction (#16918)
* [watchbox] Fix extraction (#16904)
* [dplayit] Sort formats
* [dplayit] Fix extraction (#16901)
* [youtube] Improve login error handling (#13822)
version 2018.07.04
Core
* [extractor/common] Properly escape % in MPD templates (#16867)
* [extractor/common] Use source URL as Referer for HTML5 entries (16849)
* Prefer ffmpeg over avconv by default (#8622)
Extractors
* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899)
* [lynda] Simplify login and improve error capturing (#16891)
+ [go90] Add support for embed URLs (#16873)
* [go90] Detect geo restriction error and pass geo verification headers
(#16874)
* [vlive] Fix live streams extraction (#16871)
* [npo] Fix typo (#16872)
+ [mediaset] Add support for new videos and extract all formats (#16568)
* [dctptv] Restore extraction based on REST API (#16850)
* [svt] Improve extraction and add support for pages (#16802)
* [porncom] Fix extraction (#16808)
version 2018.06.25
Extractors
* [joj] Relax URL regular expression (#16771)
* [brightcove] Workaround sonyliv DRM protected videos (#16807)
* [motherless] Fix extraction (#16786)
* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780)
- [foxnews:insider] Remove extractor (#15810)
+ [foxnews] Add support for iframe embeds (#15810, #16711)
version 2018.06.19
Core
+ [extractor/common] Introduce expected_status in _download_* methods
for convenient accept of HTTP requests failed with non 2xx status codes
+ [compat] Introduce compat_integer_types
Extractors
* [peertube] Improve generic support (#16733)
+ [6play] Use geo verification headers
* [rtbf] Fix extraction for python 3.2
* [vgtv] Improve HLS formats extraction
+ [vgtv] Add support for www.aftonbladet.se/tv URLs
* [bbccouk] Use expected_status
* [markiza] Expect 500 HTTP status code
* [tvnow] Try all clear manifest URLs (#15361)
version 2018.06.18
Core
* [downloader/rtmp] Fix downloading in verbose mode (#16736)
Extractors
+ [markiza] Add support for markiza.sk (#16750)
* [wat] Try all supported adaptive URLs
+ [6play] Add support for rtlplay.be and extract hd usp formats
+ [rtbf] Add support for audio and live streams (#9638, #11923)
+ [rtbf] Extract HLS, DASH and all HTTP formats
+ [rtbf] Extract subtitles
+ [rtbf] Fixup specific HTTP URLs (#16101)
+ [expressen] Add support for expressen.se
* [vidzi] Fix extraction (#16678)
* [pbs] Improve extraction (#16623, #16684)
* [bilibili] Restrict cid regular expression (#16638, #16734)
version 2018.06.14
Core
* [downloader/http] Fix retry on error when streaming to stdout (#16699)
Extractors
+ [discoverynetworks] Add support for disco-api videos (#16724)
+ [dailymotion] Add support for password protected videos (#9789)
+ [abc:iview] Add support for livestreams (#12354)
* [abc:iview] Fix extraction (#16704)
+ [crackle] Add support for sonycrackle.com (#16698)
+ [tvnet] Add support for tvnet.gov.vn (#15462)
* [nrk] Update API hosts and try all previously known ones (#16690)
* [wimp] Fix Youtube embeds extraction
version 2018.06.11
Extractors
* [npo] Extend URL regular expression and add support for npostart.nl (#16682)
+ [inc] Add support for another embed schema (#16666)
* [tv4] Fix format extraction (#16650)
+ [nexx] Add support for free cdn (#16538)
+ [pbs] Add another cove id pattern (#15373)
+ [rbmaradio] Add support for 192k format (#16631)
version 2018.06.04
Extractors
+ [camtube] Add support for camtube.co
+ [twitter:card] Extract guest token (#16609)
+ [chaturbate] Use geo verification headers
+ [bbc] Add support for bbcthree (#16612)
* [youtube] Move metadata extraction after video availability check
+ [youtube] Extract track and artist
+ [safari] Add support for new URL schema (#16614)
* [adn] Fix extraction
version 2018.06.02
Core
* [utils] Improve determine_ext
Extractors
+ [facebook] Add support for tahoe player videos (#15441, #16554)
* [cbc] Improve extraction (#16583, #16593)
* [openload] Improve ext extraction (#16595)
+ [twitter:card] Add support for another endpoint (#16586)
+ [openload] Add support for oload.win and oload.download (#16592)
* [audimedia] Fix extraction (#15309)
+ [francetv] Add support for sport.francetvinfo.fr (#15645)
* [mlb] Improve extraction (#16587)
- [nhl] Remove old extractors
* [rbmaradio] Check formats availability (#16585)
version 2018.05.30
Core
* [downloader/rtmp] Generalize download messages and report time elapsed
on finish
* [downloader/rtmp] Gracefully handle live streams interrupted by user
Extractors
* [teamcoco] Fix extraction for full episodes (#16573)
* [spiegel] Fix info extraction (#16538)
+ [apa] Add support for apa.at (#15041, #15672)
+ [bellmedia] Add support for bnnbloomberg.ca (#16560)
+ [9c9media] Extract MPD formats and subtitles
* [cammodels] Use geo verification headers
+ [ufctv] Add support for authentication (#16542)
+ [cammodels] Add support for cammodels.com (#14499)
* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt
(#16551)
* [soundcloud] Detect format extension (#16549)
* [cbc] Fix playlist title extraction (#16502)
+ [tumblr] Detect and report sensitive media (#13829)
+ [tumblr] Add support for authentication (#15133)
version 2018.05.26
Core

View File

@@ -17,7 +17,7 @@ youtube-dl - download videos from youtube.com or other video platforms
# INSTALLATION
To install it right away for all UNIX users (Linux, OS X, etc.), type:
To install it right away for all UNIX users (Linux, macOS, etc.), type:
sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
sudo chmod a+rx /usr/local/bin/youtube-dl
@@ -35,7 +35,7 @@ You can also use pip:
This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
OS X users can install youtube-dl with [Homebrew](https://brew.sh/):
macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
brew install youtube-dl
@@ -427,9 +427,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
default; fix file if we can, warn
otherwise)
--prefer-avconv Prefer avconv over ffmpeg for running the
postprocessors (default)
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors
--prefer-ffmpeg Prefer ffmpeg over avconv for running the
postprocessors (default)
--ffmpeg-location PATH Location of the ffmpeg/avconv binary;
either the path to the binary or its
containing directory.
@@ -442,7 +442,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
# CONFIGURATION
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
```
@@ -870,7 +870,7 @@ Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the op
Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [Export Cookies](https://addons.mozilla.org/en-US/firefox/addon/export-cookies/) (for Firefox).
In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.

View File

@@ -13,7 +13,7 @@ year = str(datetime.datetime.now().year)
for fn in glob.glob('*.html*'):
with io.open(fn, encoding='utf-8') as f:
content = f.read()
newc = re.sub(r'(?P<copyright>Copyright © 2006-)(?P<year>[0-9]{4})', 'Copyright © 2006-' + year, content)
newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content)
if content != newc:
tmpFn = fn + '.part'
with io.open(tmpFn, 'wt', encoding='utf-8') as outf:

View File

@@ -15,7 +15,6 @@
- **8tracks**
- **91porn**
- **9c9media**
- **9c9media:stack**
- **9gag**
- **9now.com.au**
- **abc.net.au**
@@ -48,6 +47,7 @@
- **anitube.se**
- **Anvato**
- **AnySex**
- **APA**
- **Aparat**
- **AppleConnect**
- **AppleDaily**: 臺灣蘋果日報
@@ -56,6 +56,7 @@
- **archive.org**: archive.org videos
- **ARD**
- **ARD:mediathek**
- **ARDBetaMediathek**
- **Arkena**
- **arte.tv**
- **arte.tv:+7**
@@ -108,6 +109,8 @@
- **BiliBili**
- **BioBioChileTV**
- **BIQLE**
- **BitChute**
- **BitChuteChannel**
- **BleacherReport**
- **BleacherReportCMS**
- **blinkx**
@@ -128,6 +131,8 @@
- **BYUtv**
- **Camdemy**
- **CamdemyFolder**
- **CamModels**
- **CamTube**
- **CamWithHer**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
@@ -187,7 +192,7 @@
- **Crackle**
- **Criterion**
- **CrooksAndLiars**
- **Crunchyroll**
- **crunchyroll**
- **crunchyroll:playlist**
- **CSNNE**
- **CSpan**: C-SPAN
@@ -264,6 +269,7 @@
- **Europa**
- **EveryonesMixtape**
- **ExpoTV**
- **Expressen**
- **ExtremeTube**
- **EyedoTV**
- **facebook**
@@ -287,7 +293,6 @@
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
- **foxnews:insider**
- **FoxSports**
- **france2.fr:generation-what**
- **FranceCulture**
@@ -300,6 +305,9 @@
- **Freesound**
- **freespeech.org**
- **FreshLive**
- **FrontendMasters**
- **FrontendMastersCourse**
- **FrontendMastersLesson**
- **Funimation**
- **FunkChannel**
- **FunkMix**
@@ -400,6 +408,7 @@
- **Ketnet**
- **KhanAcademy**
- **KickStarter**
- **KinoPoisk**
- **KonserthusetPlay**
- **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
@@ -453,6 +462,8 @@
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
- **Markiza**
- **MarkizaPage**
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
@@ -552,9 +563,6 @@
- **nfl.com**
- **NhkVod**
- **nhl.com**
- **nhl.com:news**: NHL news
- **nhl.com:videocenter**
- **nhl.com:videocenter:category**: NHL videocenter category
- **nick.com**
- **nick.de**
- **nickelodeon:br**
@@ -573,6 +581,7 @@
- **Normalboots**
- **NosVideo**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **NovaEmbed**
- **nowness**
- **nowness:playlist**
- **nowness:series**
@@ -588,7 +597,9 @@
- **NRKSkole**: NRK Skole
- **NRKTV**: NRK TV and NRK Radio
- **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte
- **NRKTVEpisode**
- **NRKTVEpisodes**
- **NRKTVSeason**
- **NRKTVSeries**
- **ntv.ru**
- **Nuvid**
@@ -666,6 +677,8 @@
- **PrimeShareTV**
- **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
- **puhutv:serie**
- **Puls4**
- **Pyvideo**
- **qqmusic**: QQ音乐
@@ -688,6 +701,7 @@
- **RaiPlayLive**
- **RaiPlayPlaylist**
- **RayWenderlich**
- **RayWenderlichCourse**
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
@@ -792,6 +806,7 @@
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- **SportBoxEmbed**
- **SportDeutschland**
@@ -811,6 +826,7 @@
- **StretchInternet**
- **SunPorno**
- **SVT**
- **SVTPage**
- **SVTPlay**: SVT Play and Öppet arkiv
- **SVTSeries**
- **SWRMediathek**
@@ -893,6 +909,7 @@
- **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com**
- **TVN24**
- **TVNet**
- **TVNoe**
- **TVNow**
- **TVNowList**
@@ -901,6 +918,7 @@
- **tvp:embed**: Telewizja Polska
- **tvp:series**
- **TVPlayer**
- **TVPlayHome**
- **Tweakers**
- **twitch:chapter**
- **twitch:clips**
@@ -990,6 +1008,7 @@
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **Viqeo**
- **Viu**
- **viu:ott**
- **viu:playlist**
@@ -1081,6 +1100,7 @@
- **YouNowLive**
- **YouNowMoment**
- **YouPorn**
- **YourPorn**
- **YourUpload**
- **youtube**: YouTube.com
- **youtube:channel**: YouTube.com channels

View File

@@ -2,5 +2,5 @@
universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
ignore = E402,E501,E731,E741

View File

@@ -78,6 +78,7 @@ from youtube_dl.utils import (
uppercase_escape,
lowercase_escape,
url_basename,
url_or_none,
base_url,
urljoin,
urlencode_postdata,
@@ -361,6 +362,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
self.assertEqual(determine_ext('foobar', None), None)
def test_find_xpath_attr(self):
testxml = '''<root>
@@ -506,6 +508,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
def test_url_or_none(self):
self.assertEqual(url_or_none(None), None)
self.assertEqual(url_or_none(''), None)
self.assertEqual(url_or_none('foo'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de')
self.assertEqual(url_or_none('http$://foo.de'), None)
self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de')
self.assertEqual(url_or_none('//foo.de'), '//foo.de')
def test_parse_age_limit(self):
self.assertEqual(parse_age_limit(None), None)
self.assertEqual(parse_age_limit(False), None)
@@ -716,6 +728,10 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
stripped = strip_jsonp('({"status": "success"});')
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape(''), '')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')

View File

@@ -305,8 +305,8 @@ class YoutubeDL(object):
http_chunk_size.
The following options are used by the post processors:
prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
otherwise prefer avconv.
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
otherwise prefer ffmpeg.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.

View File

@@ -2787,6 +2787,12 @@ except NameError: # Python 3
compat_numeric_types = (int, float, complex)
try:
compat_integer_types = (int, long)
except NameError: # Python 3
compat_integer_types = (int, )
if sys.version_info < (2, 7):
def compat_socket_create_connection(address, timeout, source_address=None):
host, port = address
@@ -2974,6 +2980,7 @@ __all__ = [
'compat_http_client',
'compat_http_server',
'compat_input',
'compat_integer_types',
'compat_itertools_count',
'compat_kwargs',
'compat_numeric_types',

View File

@@ -2,7 +2,10 @@ from __future__ import unicode_literals
from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import urljoin
from ..utils import (
DownloadError,
urljoin,
)
class DashSegmentsFD(FragmentFD):
@@ -57,6 +60,14 @@ class DashSegmentsFD(FragmentFD):
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
except DownloadError:
# Don't retry fragment if error occurred during HTTP downloading
# itself since it has own retry settings
if not fatal:
self.report_skip_fragment(frag_index)
break
raise
if count > fragment_retries:
if not fatal:
self.report_skip_fragment(frag_index)

View File

@@ -217,10 +217,11 @@ class HttpFD(FileDownloader):
before = start # start measuring
def retry(e):
if ctx.tmpfilename != '-':
to_stdout = ctx.tmpfilename == '-'
if not to_stdout:
ctx.stream.close()
ctx.stream = None
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e)
while True:

View File

@@ -29,66 +29,68 @@ class RtmpFD(FileDownloader):
proc = subprocess.Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
while not proc_stderr_closed:
# read line from stderr
line = ''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
time_now = time.time()
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
try:
while not proc_stderr_closed:
# read line from stderr
line = ''
while True:
char = proc.stderr.read(1)
if not char:
proc_stderr_closed = True
break
if char in [b'\r', b'\n']:
break
line += char.decode('ascii', 'replace')
if not line:
# proc_stderr_closed is True
continue
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
percent = float(mobj.group(2))
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
self._hook_progress({
'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
proc.wait()
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
if mobj:
downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
'elapsed': time_now - start,
'speed': speed,
})
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
finally:
proc.wait()
if not cursor_in_new_line:
self.to_screen('')
return proc.returncode
@@ -163,7 +165,15 @@ class RtmpFD(FileDownloader):
RD_INCOMPLETE = 2
RD_NO_CONNECT = 3
retval = run_rtmpdump(args)
started = time.time()
try:
retval = run_rtmpdump(args)
except KeyboardInterrupt:
if not info_dict.get('is_live'):
raise
retval = RD_SUCCESS
self.to_screen('\n[rtmpdump] Interrupted by user')
if retval == RD_NO_CONNECT:
self.report_error('[rtmpdump] Could not connect to RTMP server.')
@@ -171,7 +181,7 @@ class RtmpFD(FileDownloader):
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume']
if retval == RD_FAILED:
@@ -188,13 +198,14 @@ class RtmpFD(FileDownloader):
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % fsize)
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
})
return True
else:

View File

@@ -105,22 +105,22 @@ class ABCIE(InfoExtractor):
class ABCIViewIE(InfoExtractor):
IE_NAME = 'abc.net.au:iview'
_VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
_GEO_COUNTRIES = ['AU']
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
'url': 'https://iview.abc.net.au/programs/ben-and-hollys-little-kingdom/ZY9247A021S00',
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
'info_dict': {
'id': 'ZY9247A021S00',
'id': 'ZX9371A050S00',
'ext': 'mp4',
'title': "Gaston's Visit",
'title': "Gaston's Birthday",
'series': "Ben And Holly's Little Kingdom",
'description': 'md5:18db170ad71cf161e006a4c688e33155',
'upload_date': '20180318',
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
'upload_date': '20180604',
'uploader_id': 'abc4kids',
'timestamp': 1521400959,
'timestamp': 1528140219,
},
'params': {
'skip_download': True,
@@ -129,17 +129,16 @@ class ABCIViewIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_params = self._parse_json(self._search_regex(
r'videoParams\s*=\s*({.+?});', webpage, 'video params'), video_id)
title = video_params.get('title') or video_params['seriesTitle']
stream = next(s for s in video_params['playlist'] if s.get('type') == 'program')
video_params = self._download_json(
'https://iview.abc.net.au/api/programs/' + video_id, video_id)
title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
house_number = video_params.get('episodeHouseNumber')
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-mobile'.format(
house_number = video_params.get('episodeHouseNumber') or video_id
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
int(time.time()), house_number)
sig = hmac.new(
'android.content.res.Resources'.encode('utf-8'),
b'android.content.res.Resources',
path.encode('utf-8'), hashlib.sha256).hexdigest()
token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
@@ -169,18 +168,26 @@ class ABCIViewIE(InfoExtractor):
'ext': 'vtt',
}]
is_live = video_params.get('livestream') == '1'
if is_live:
title = self._live_title(title)
return {
'id': video_id,
'title': unescapeHTML(title),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image:src'], webpage),
'title': title,
'description': video_params.get('description'),
'thumbnail': video_params.get('thumbnail'),
'duration': int_or_none(video_params.get('eventDuration')),
'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
'series': unescapeHTML(video_params.get('seriesTitle')),
'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
'episode_number': int_or_none(self._html_search_meta('episodeNumber', webpage, default=None)),
'episode': self._html_search_meta('episode_title', webpage, default=None),
'season_number': int_or_none(self._search_regex(
r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
'episode_number': int_or_none(self._search_regex(
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
'episode_id': house_number,
'uploader_id': video_params.get('channel'),
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
}

View File

@@ -1,8 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import binascii
import json
import os
import random
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
@@ -12,9 +15,12 @@ from ..compat import (
)
from ..utils import (
bytes_to_intlist,
bytes_to_long,
ExtractorError,
float_or_none,
intlist_to_bytes,
long_to_bytes,
pkcs1pad,
srt_subtitles_timecode,
strip_or_none,
urljoin,
@@ -35,6 +41,7 @@ class ADNIE(InfoExtractor):
}
}
_BASE_URL = 'http://animedigitalnetwork.fr'
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
@@ -42,16 +49,14 @@ class ADNIE(InfoExtractor):
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path),
video_id, fatal=False, headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
})
video_id, fatal=False)
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
bytes_to_intlist(b'\xc8\x6e\x06\xbc\xbe\xc6\x49\xf5\x88\x0d\xc8\x47\xc4\x27\x0c\x60'),
bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
@@ -112,11 +117,24 @@ class ADNIE(InfoExtractor):
error = None
if not links:
links_url = player_config.get('linksurl') or options['videoUrl']
links_data = self._download_json(urljoin(
self._BASE_URL, links_url), video_id)
token = options['token']
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
message = bytes_to_intlist(json.dumps({
'k': self._K,
'e': 60,
't': token,
}))
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
n, e = self._RSA_KEY
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode()
links_data = self._download_json(
urljoin(self._BASE_URL, links_url), video_id, headers={
'Authorization': 'Bearer ' + authorization,
})
links = links_data.get('links') or {}
metas = metas or links_data.get('meta') or {}
sub_path = sub_path or links_data.get('subtitles')
sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
error = links_data.get('error')
title = metas.get('title') or video_info['title']

View File

@@ -7,6 +7,7 @@ from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
strip_or_none,
url_or_none,
)
@@ -98,7 +99,7 @@ class AdultSwimIE(TurnerBaseIE):
if not video_id:
entries = []
for episode in video_data.get('archiveEpisodes', []):
episode_url = episode.get('url')
episode_url = url_or_none(episode.get('url'))
if not episode_url:
continue
entries.append(self.url_result(

View File

@@ -9,6 +9,7 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
url_or_none,
urlencode_postdata,
xpath_text,
)
@@ -304,7 +305,7 @@ class AfreecaTVIE(InfoExtractor):
file_elements = video_element.findall(compat_xpath('./file'))
one = len(file_elements) == 1
for file_num, file_element in enumerate(file_elements, start=1):
file_url = file_element.text
file_url = url_or_none(file_element.text)
if not file_url:
continue
key = file_element.get('key', '')

View File

@@ -3,11 +3,12 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
mimetype2ext,
determine_ext,
ExtractorError,
int_or_none,
mimetype2ext,
parse_iso8601,
url_or_none,
)
@@ -35,7 +36,7 @@ class AMPIE(InfoExtractor):
media_thumbnail = [media_thumbnail]
for thumbnail_data in media_thumbnail:
thumbnail = thumbnail_data.get('@attributes', {})
thumbnail_url = thumbnail.get('url')
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
thumbnails.append({
@@ -51,7 +52,7 @@ class AMPIE(InfoExtractor):
media_subtitle = [media_subtitle]
for subtitle_data in media_subtitle:
subtitle = subtitle_data.get('@attributes', {})
subtitle_href = subtitle.get('href')
subtitle_href = url_or_none(subtitle.get('href'))
if not subtitle_href:
continue
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
@@ -65,7 +66,7 @@ class AMPIE(InfoExtractor):
media_content = [media_content]
for media_data in media_content:
media = media_data.get('@attributes', {})
media_url = media.get('url')
media_url = url_or_none(media.get('url'))
if not media_url:
continue
ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
@@ -79,7 +80,7 @@ class AMPIE(InfoExtractor):
else:
formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
'url': media['url'],
'url': media_url,
'tbr': int_or_none(media.get('bitrate')),
'filesize': int_or_none(media.get('fileSize')),
'ext': ext,

View File

@@ -8,6 +8,7 @@ from ..utils import (
determine_ext,
extract_attributes,
ExtractorError,
url_or_none,
urlencode_postdata,
urljoin,
)
@@ -52,7 +53,7 @@ class AnimeOnDemandIE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
@@ -165,7 +166,7 @@ class AnimeOnDemandIE(InfoExtractor):
}, fatal=False)
if not playlist:
continue
stream_url = playlist.get('streamurl')
stream_url = url_or_none(playlist.get('streamurl'))
if stream_url:
rtmp = re.search(
r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)',

View File

@@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
_TESTS = [{
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
'info_dict': {
'id': '4465496',
'ext': 'mp4',
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
'duration': 22,
'timestamp': 1534855680,
'upload_date': '20180821',
'uploader': 'ANV',
},
'params': {
'skip_download': True,
},
}, {
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
'only_matching': True,
}]
def __init__(self, *args, **kwargs):
super(AnvatoIE, self).__init__(*args, **kwargs)
self.__server_time = None
@@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
'api': {
'anvrid': anvrid,
'anvstk': md5_text('%s|%s|%d|%s' % (
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
access_key, anvrid, server_time,
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
'anvts': server_time,
},
}
@@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
access_key) or access_key
return self._get_anvato_videos(access_key, video_id)

View File

@@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
)
@@ -77,7 +78,7 @@ class AolIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
for rendition in video_data.get('renditions', []):
video_url = rendition.get('url')
video_url = url_or_none(rendition.get('url'))
if not video_url:
continue
ext = rendition.get('format')

View File

@@ -0,0 +1,94 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
js_to_json,
url_or_none,
)
class APAIE(InfoExtractor):
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
'info_dict': {
'id': 'jjv85FdZ',
'ext': 'mp4',
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 254,
'timestamp': 1519211149,
'upload_date': '20180221',
},
}, {
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
'only_matching': True,
}, {
'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76',
'only_matching': True,
}, {
'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
jwplatform_id = self._search_regex(
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
'jwplatform id', default=None)
if jwplatform_id:
return self.url_result(
'jwplatform:' + jwplatform_id, ie='JWPlatform',
video_id=video_id)
sources = self._parse_json(
self._search_regex(
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
video_id, transform_source=js_to_json)
formats = []
for source in sources:
if not isinstance(source, dict):
continue
source_url = url_or_none(source.get('file'))
if not source_url:
continue
ext = determine_ext(source_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
else:
formats.append({
'url': source_url,
})
self._sort_formats(formats)
thumbnail = self._search_regex(
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='url')
return {
'id': video_id,
'title': video_id,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
mimetype2ext,
url_or_none,
)
@@ -43,7 +44,7 @@ class AparatIE(InfoExtractor):
formats = []
for item in file_list[0]:
file_url = item.get('file')
file_url = url_or_none(item.get('file'))
if not file_url:
continue
ext = mimetype2ext(item.get('type'))

View File

@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from .generic import GenericIE
from ..compat import compat_str
from ..utils import (
determine_ext,
ExtractorError,
@@ -15,13 +14,14 @@ from ..utils import (
unified_strdate,
xpath_text,
update_url_query,
url_or_none,
)
from ..compat import compat_etree_fromstring
class ARDMediathekIE(InfoExtractor):
IE_NAME = 'ARD:mediathek'
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
# available till 26.07.2022
@@ -37,6 +37,9 @@ class ARDMediathekIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
}, {
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
'only_matching': True,
}, {
# audio
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
@@ -100,7 +103,7 @@ class ARDMediathekIE(InfoExtractor):
quality = stream.get('_quality')
server = stream.get('_server')
for stream_url in stream_urls:
if not isinstance(stream_url, compat_str) or '//' not in stream_url:
if not url_or_none(stream_url):
continue
ext = determine_ext(stream_url)
if quality != 'auto' and ext in ('f4m', 'm3u8'):
@@ -282,3 +285,76 @@ class ARDIE(InfoExtractor):
'upload_date': upload_date,
'thumbnail': thumbnail,
}
class ARDBetaMediathekIE(InfoExtractor):
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
_TESTS = [{
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
'info_dict': {
'display_id': 'die-robuste-roswita',
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'title': 'Tatort: Die robuste Roswita',
'description': r're:^Der Mord.*trüber ist als die Ilm.',
'duration': 5316,
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
'upload_date': '20180826',
'ext': 'mp4',
},
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
data = self._parse_json(data_json, display_id)
res = {
'id': video_id,
'display_id': display_id,
}
formats = []
for widget in data.values():
if widget.get('_geoblocked'):
raise ExtractorError('This video is not available due to geoblocking', expected=True)
if '_duration' in widget:
res['duration'] = widget['_duration']
if 'clipTitle' in widget:
res['title'] = widget['clipTitle']
if '_previewImage' in widget:
res['thumbnail'] = widget['_previewImage']
if 'broadcastedOn' in widget:
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
if 'synopsis' in widget:
res['description'] = widget['synopsis']
if '_subtitleUrl' in widget:
res['subtitles'] = {'de': [{
'ext': 'ttml',
'url': widget['_subtitleUrl'],
}]}
if '_quality' in widget:
format_url = widget['_stream']['json'][0]
if format_url.endswith('.f4m'):
formats.extend(self._extract_f4m_formats(
format_url + '?hdcore=3.11.0',
video_id, f4m_id='hds', fatal=False))
elif format_url.endswith('m3u8'):
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
formats.append({
'format_id': 'http-' + widget['_quality'],
'url': format_url,
'preference': 10, # Plain HTTP, that's nice
})
self._sort_formats(formats)
res['formats'] = formats
return res

View File

@@ -74,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@@ -5,13 +5,12 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
sanitized_Request,
)
class AudiMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': {
@@ -24,41 +23,46 @@ class AudiMediaIE(InfoExtractor):
'duration': 74022,
'view_count': int,
}
}
# extracted from https://audimedia.tv/assets/embed/embedded-player.js (dataSourceAuthToken)
_AUTH_TOKEN = 'e25b42847dba18c6c8816d5d8ce94c326e06823ebf0859ed164b3ba169be97f2'
}, {
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
raw_payload = self._search_regex([
r'class="amtv-embed"[^>]+id="([^"]+)"',
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"',
r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"',
r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"',
r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"',
r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})',
], webpage, 'raw payload')
_, stage_mode, video_id, lang = raw_payload.split('-')
_, stage_mode, video_id, _ = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams)
if stage_mode not in ('s', 'e'):
request = sanitized_Request(
'https://audimedia.tv/api/video/v1/videos/%s?embed[]=video_versions&embed[]=thumbnail_image&where[content_language_iso]=%s' % (video_id, lang),
headers={'X-Auth-Token': self._AUTH_TOKEN})
json_data = self._download_json(request, video_id)['results']
video_data = self._download_json(
'https://www.audimedia.tv/api/video/v1/videos/' + video_id,
video_id, query={
'embed[]': ['video_versions', 'thumbnail_image'],
})['results']
formats = []
stream_url_hls = json_data.get('stream_url_hls')
stream_url_hls = video_data.get('stream_url_hls')
if stream_url_hls:
formats.extend(self._extract_m3u8_formats(
stream_url_hls, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
stream_url_hds = json_data.get('stream_url_hds')
stream_url_hds = video_data.get('stream_url_hds')
if stream_url_hds:
formats.extend(self._extract_f4m_formats(
stream_url_hds + '?hdcore=3.4.0',
video_id, f4m_id='hds', fatal=False))
for video_version in json_data.get('video_versions'):
for video_version in video_data.get('video_versions', []):
video_version_url = video_version.get('download_url') or video_version.get('stream_url')
if not video_version_url:
continue
@@ -79,11 +83,11 @@ class AudiMediaIE(InfoExtractor):
return {
'id': video_id,
'title': json_data['title'],
'description': json_data.get('subtitle'),
'thumbnail': json_data.get('thumbnail_image', {}).get('file'),
'timestamp': parse_iso8601(json_data.get('publication_date')),
'duration': int_or_none(json_data.get('duration')),
'view_count': int_or_none(json_data.get('view_count')),
'title': video_data['title'],
'description': video_data.get('subtitle'),
'thumbnail': video_data.get('thumbnail_image', {}).get('file'),
'timestamp': parse_iso8601(video_data.get('publication_date')),
'duration': int_or_none(video_data.get('duration')),
'view_count': int_or_none(video_data.get('view_count')),
'formats': formats,
}

View File

@@ -44,7 +44,7 @@ class BambuserIE(InfoExtractor):
}
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

View File

@@ -1,6 +1,5 @@
from __future__ import unicode_literals
import json
import random
import re
import time
@@ -16,14 +15,18 @@ from ..utils import (
int_or_none,
KNOWN_EXTENSIONS,
parse_filesize,
str_or_none,
try_get,
unescapeHTML,
update_url_query,
unified_strdate,
unified_timestamp,
url_or_none,
)
class BandcampIE(InfoExtractor):
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
_TESTS = [{
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
'md5': 'c557841d5e50261777a6585648adf439',
@@ -35,13 +38,44 @@ class BandcampIE(InfoExtractor):
},
'_skip': 'There is a limit of 200 free downloads / month for the test song'
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
'md5': '853e35bf34aa1d6fe2615ae612564b36',
'info_dict': {
'id': '2650410135',
'ext': 'aiff',
'title': 'Ben Prunty - Lanius (Battle)',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Ben Prunty',
'timestamp': 1396508491,
'upload_date': '20140403',
'release_date': '20140403',
'duration': 260.877,
'track': 'Lanius (Battle)',
'track_number': 1,
'track_id': '2650410135',
'artist': 'Ben Prunty',
'album': 'FTL: Advanced Edition Soundtrack',
},
}, {
# no free download, mp3 128
'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
'info_dict': {
'id': '2584466013',
'ext': 'mp3',
'title': 'Mastodon - Hail to Fire',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Mastodon',
'timestamp': 1322005399,
'upload_date': '20111122',
'release_date': '20040207',
'duration': 120.79,
'track': 'Hail to Fire',
'track_number': 5,
'track_id': '2584466013',
'artist': 'Mastodon',
'album': 'Call of the Mastodon',
},
}]
@@ -50,19 +84,23 @@ class BandcampIE(InfoExtractor):
title = mobj.group('title')
webpage = self._download_webpage(url, title)
thumbnail = self._html_search_meta('og:image', webpage, default=None)
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
if not m_download:
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
if m_trackinfo:
json_code = m_trackinfo.group(1)
data = json.loads(json_code)[0]
track_id = compat_str(data['id'])
if not data.get('file'):
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
track_id = None
track = None
track_number = None
duration = None
formats = []
for format_id, format_url in data['file'].items():
formats = []
track_info = self._parse_json(
self._search_regex(
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
webpage, 'track info', default='{}'), title)
if track_info:
file_ = track_info.get('file')
if isinstance(file_, dict):
for format_id, format_url in file_.items():
if not url_or_none(format_url):
continue
ext, abr_str = format_id.split('-', 1)
formats.append({
'format_id': format_id,
@@ -72,85 +110,110 @@ class BandcampIE(InfoExtractor):
'acodec': ext,
'abr': int_or_none(abr_str),
})
track = track_info.get('title')
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
track_number = int_or_none(track_info.get('track_num'))
duration = float_or_none(track_info.get('duration'))
self._sort_formats(formats)
def extract(key):
return self._search_regex(
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
webpage, key, default=None, group='value')
return {
'id': track_id,
'title': data['title'],
'thumbnail': thumbnail,
'formats': formats,
'duration': float_or_none(data.get('duration')),
}
else:
raise ExtractorError('No free songs found')
artist = extract('artist')
album = extract('album_title')
timestamp = unified_timestamp(
extract('publish_date') or extract('album_publish_date'))
release_date = unified_strdate(extract('album_release_date'))
download_link = m_download.group(1)
video_id = self._search_regex(
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'video id')
download_link = self._search_regex(
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'download link', default=None, group='url')
if download_link:
track_id = self._search_regex(
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
webpage, 'track id')
download_webpage = self._download_webpage(
download_link, video_id, 'Downloading free downloads page')
download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page')
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
video_id, transform_source=unescapeHTML)
blob = self._parse_json(
self._search_regex(
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
'blob', group='blob'),
track_id, transform_source=unescapeHTML)
info = blob['digital_items'][0]
info = try_get(
blob, (lambda x: x['digital_items'][0],
lambda x: x['download_items'][0]), dict)
if info:
downloads = info.get('downloads')
if isinstance(downloads, dict):
if not track:
track = info.get('title')
if not artist:
artist = info.get('artist')
if not thumbnail:
thumbnail = info.get('thumb_url')
downloads = info['downloads']
track = info['title']
download_formats = {}
download_formats_list = blob.get('download_formats')
if isinstance(download_formats_list, list):
for f in blob['download_formats']:
name, ext = f.get('name'), f.get('file_extension')
if all(isinstance(x, compat_str) for x in (name, ext)):
download_formats[name] = ext.strip('.')
artist = info.get('artist')
title = '%s - %s' % (artist, track) if artist else track
for format_id, f in downloads.items():
format_url = f.get('url')
if not format_url:
continue
# Stat URL generation algorithm is reverse engineered from
# download_*_bundle_*.js
stat_url = update_url_query(
format_url.replace('/download/', '/statdownload/'), {
'.rand': int(time.time() * 1000 * random.random()),
})
format_id = f.get('encoding_name') or format_id
stat = self._download_json(
stat_url, track_id, 'Downloading %s JSON' % format_id,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
fatal=False)
if not stat:
continue
retry_url = url_or_none(stat.get('retry_url'))
if not retry_url:
continue
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
'format_note': f.get('description'),
'filesize': parse_filesize(f.get('size_mb')),
'vcodec': 'none',
})
download_formats = {}
for f in blob['download_formats']:
name, ext = f.get('name'), f.get('file_extension')
if all(isinstance(x, compat_str) for x in (name, ext)):
download_formats[name] = ext.strip('.')
formats = []
for format_id, f in downloads.items():
format_url = f.get('url')
if not format_url:
continue
# Stat URL generation algorithm is reverse engineered from
# download_*_bundle_*.js
stat_url = update_url_query(
format_url.replace('/download/', '/statdownload/'), {
'.rand': int(time.time() * 1000 * random.random()),
})
format_id = f.get('encoding_name') or format_id
stat = self._download_json(
stat_url, video_id, 'Downloading %s JSON' % format_id,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
fatal=False)
if not stat:
continue
retry_url = stat.get('retry_url')
if not isinstance(retry_url, compat_str):
continue
formats.append({
'url': self._proto_relative_url(retry_url, 'http:'),
'ext': download_formats.get(format_id),
'format_id': format_id,
'format_note': f.get('description'),
'filesize': parse_filesize(f.get('size_mb')),
'vcodec': 'none',
})
self._sort_formats(formats)
title = '%s - %s' % (artist, track) if artist else track
if not duration:
duration = float_or_none(self._html_search_meta(
'duration', webpage, default=None))
return {
'id': video_id,
'id': track_id,
'title': title,
'thumbnail': info.get('thumb_url') or thumbnail,
'uploader': info.get('artist'),
'artist': artist,
'thumbnail': thumbnail,
'uploader': artist,
'timestamp': timestamp,
'release_date': release_date,
'duration': duration,
'track': track,
'track_number': track_number,
'track_id': track_id,
'artist': artist,
'album': album,
'formats': formats,
}
@@ -306,7 +369,7 @@ class BandcampWeeklyIE(InfoExtractor):
formats = []
for format_id, format_url in show['audio_stream'].items():
if not isinstance(format_url, compat_str):
if not url_or_none(format_url):
continue
for known_ext in KNOWN_EXTENSIONS:
if known_ext in format_id:

View File

@@ -12,6 +12,7 @@ from ..utils import (
float_or_none,
get_element_by_class,
int_or_none,
js_to_json,
parse_duration,
parse_iso8601,
try_get,
@@ -20,7 +21,6 @@ from ..utils import (
urljoin,
)
from ..compat import (
compat_etree_fromstring,
compat_HTTPError,
compat_urlparse,
)
@@ -29,7 +29,7 @@ from ..compat import (
class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'[pbw][\da-z]{7}'
_ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
@@ -236,6 +236,12 @@ class BBCCoUkIE(InfoExtractor):
}, {
'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/programmes/m00005xn',
'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
'only_matching': True,
}]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
@@ -333,14 +339,9 @@ class BBCCoUkIE(InfoExtractor):
self._raise_extractor_error(last_exception)
def _download_media_selector_url(self, url, programme_id=None):
try:
media_selection = self._download_xml(
url, programme_id, 'Downloading media selection XML')
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
else:
raise
media_selection = self._download_xml(
url, programme_id, 'Downloading media selection XML',
expected_status=(403, 404))
return self._process_media_selector(media_selection, programme_id)
def _process_media_selector(self, media_selection, programme_id):
@@ -772,6 +773,28 @@ class BBCIE(BBCCoUkIE):
# single video article embedded with data-media-vpid
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
'only_matching': True,
}, {
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
'info_dict': {
'id': 'p06556y7',
'ext': 'mp4',
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
},
'params': {
'skip_download': True,
}
}, {
# window.__PRELOADED_STATE__
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
'info_dict': {
'id': 'b0b9z4vz',
'ext': 'mp4',
'title': 'Prom 6: An American in Paris and Turangalila',
'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
}]
@classmethod
@@ -994,6 +1017,66 @@ class BBCIE(BBCCoUkIE):
'subtitles': subtitles,
}
preload_state = self._parse_json(self._search_regex(
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if preload_state:
current_programme = preload_state.get('programmes', {}).get('current') or {}
programme_id = current_programme.get('id')
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
self._sort_formats(formats)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
current_programme.get('duration', {}).get('value'))
thumbnail = None
image_url = current_programme.get('image_url')
if image_url:
thumbnail = image_url.replace('{recipe}', '1920x1920')
return {
'id': programme_id,
'title': title,
'description': dict_get(synopses, ('long', 'medium', 'short')),
'thumbnail': thumbnail,
'duration': duration,
'uploader': network.get('short_title'),
'uploader_id': network.get('id'),
'formats': formats,
'subtitles': subtitles,
}
bbc3_config = self._parse_json(
self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
'bbcthree config', default='{}'),
playlist_id, transform_source=js_to_json, fatal=False)
if bbc3_config:
bbc3_playlist = try_get(
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
dict)
if bbc3_playlist:
playlist_title = bbc3_playlist.get('title') or playlist_title
thumbnail = bbc3_playlist.get('holdingImageURL')
entries = []
for bbc3_item in bbc3_playlist['items']:
programme_id = bbc3_item.get('versionID')
if not programme_id:
continue
formats, subtitles = self._download_media_selector(programme_id)
self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': playlist_title,
'thumbnail': thumbnail,
'timestamp': timestamp,
'formats': formats,
'subtitles': subtitles,
})
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),

View File

@@ -12,7 +12,7 @@ class BellMediaIE(InfoExtractor):
(?:
ctv|
tsn|
bnn|
bnn(?:bloomberg)?|
thecomedynetwork|
discovery|
discoveryvelocity|
@@ -27,17 +27,16 @@ class BellMediaIE(InfoExtractor):
much\.com
)/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'http://www.ctv.ca/video/player?vid=706966',
'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0',
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
'info_dict': {
'id': '706966',
'ext': 'mp4',
'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'',
'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.',
'upload_date': '20150919',
'timestamp': 1442624700,
'id': '1403070',
'ext': 'flv',
'title': 'David Cockfield\'s Top Picks',
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
},
'expected_warnings': ['HTTP Error 404'],
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
'only_matching': True,
@@ -70,6 +69,7 @@ class BellMediaIE(InfoExtractor):
'investigationdiscovery': 'invdisc',
'animalplanet': 'aniplan',
'etalk': 'ctv',
'bnnbloomberg': 'bnn',
}
def _real_extract(self, url):

View File

@@ -114,7 +114,7 @@ class BiliBiliIE(InfoExtractor):
if 'anime/' not in url:
cid = self._search_regex(
r'cid(?:["\']:|=)(\d+)', webpage, 'cid',
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None
) or compat_parse_qs(self._search_regex(
[r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',

View File

@@ -0,0 +1,120 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
from ..utils import urlencode_postdata
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
'info_dict': {
'id': 'szoMrox2JEI',
'ext': 'mp4',
'title': 'Fuck bitches get money',
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Victoria X Rave',
},
}, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
'only_matching': True,
}, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
})
title = self._search_regex(
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
webpage, 'title', default=None) or self._html_search_meta(
'description', webpage, 'title',
default=None) or self._og_search_description(webpage)
formats = [
{'url': mobj.group('url')}
for mobj in re.finditer(
r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
self._sort_formats(formats)
description = self._html_search_regex(
r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex(
r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
'uploader', fatal=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': formats,
}
class BitChuteChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://www.bitchute.com/channel/victoriaxrave/',
'playlist_mincount': 185,
'info_dict': {
'id': 'victoriaxrave',
},
}
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
def _entries(self, channel_id):
channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
offset = 0
for page_num in itertools.count(1):
data = self._download_json(
'%sextend/' % channel_url, channel_id,
'Downloading channel page %d' % page_num,
data=urlencode_postdata({
'csrfmiddlewaretoken': self._TOKEN,
'name': '',
'offset': offset,
}), headers={
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Referer': channel_url,
'X-Requested-With': 'XMLHttpRequest',
'Cookie': 'csrftoken=%s' % self._TOKEN,
})
if data.get('success') is False:
break
html = data.get('html')
if not html:
break
video_ids = re.findall(
r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
html)
if not video_ids:
break
offset += len(video_ids)
for video_id in video_ids:
yield self.url_result(
'https://www.bitchute.com/video/%s' % video_id,
ie=BitChuteIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
channel_id = self._match_id(url)
return self.playlist_result(
self._entries(channel_id), playlist_id=channel_id)

View File

@@ -4,8 +4,10 @@ import re
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..compat import compat_str
from ..utils import int_or_none
from ..utils import (
int_or_none,
url_or_none,
)
class BreakIE(InfoExtractor):
@@ -55,8 +57,8 @@ class BreakIE(InfoExtractor):
formats = []
for video in content:
video_url = video.get('url')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(video.get('url'))
if not video_url:
continue
bitrate = int_or_none(self._search_regex(
r'(\d+)_kbps', video_url, 'tbr', default=None))

View File

@@ -572,7 +572,8 @@ class BrightcoveNewIE(AdobePassIE):
container = source.get('container')
ext = mimetype2ext(source.get('type'))
src = source.get('src')
if ext == 'ism' or container == 'WVM':
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
continue
elif ext == 'm3u8' or container == 'M2TS':
if not src:
@@ -629,6 +630,14 @@ class BrightcoveNewIE(AdobePassIE):
'format_id': build_format_id('rtmp'),
})
formats.append(f)
if not formats:
# for sonyliv.com DRM protected videos
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
if s3_source_url:
formats.append({
'url': s3_source_url,
'format_id': 'source',
})
errors = json_data.get('errors')
if not formats and errors:

View File

@@ -0,0 +1,96 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
)
class CamModelsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
'only_matching': True,
}]
def _real_extract(self, url):
user_id = self._match_id(url)
webpage = self._download_webpage(
url, user_id, headers=self.geo_verification_headers())
manifest_root = self._html_search_regex(
r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None)
if not manifest_root:
ERRORS = (
("I'm offline, but let's stay connected", 'This user is currently offline'),
('in a private show', 'This user is in a private show'),
('is currently performing LIVE', 'This model is currently performing live'),
)
for pattern, message in ERRORS:
if pattern in webpage:
error = message
expected = True
break
else:
error = 'Unable to find manifest URL root'
expected = False
raise ExtractorError(error, expected=expected)
manifest = self._download_json(
'%s%s.json' % (manifest_root, user_id), user_id)
formats = []
for format_id, format_dict in manifest['formats'].items():
if not isinstance(format_dict, dict):
continue
encodings = format_dict.get('encodings')
if not isinstance(encodings, list):
continue
vcodec = format_dict.get('videoCodec')
acodec = format_dict.get('audioCodec')
for media in encodings:
if not isinstance(media, dict):
continue
media_url = url_or_none(media.get('location'))
if not media_url:
continue
format_id_list = [format_id]
height = int_or_none(media.get('videoHeight'))
if height is not None:
format_id_list.append('%dp' % height)
f = {
'url': media_url,
'format_id': '-'.join(format_id_list),
'width': int_or_none(media.get('videoWidth')),
'height': height,
'vbr': int_or_none(media.get('videoKbps')),
'abr': int_or_none(media.get('audioKbps')),
'fps': int_or_none(media.get('fps')),
'vcodec': vcodec,
'acodec': acodec,
}
if 'rtmp' in format_id:
f['ext'] = 'flv'
elif 'hls' in format_id:
f.update({
'ext': 'mp4',
# hls skips fragments, preferring rtmp
'preference': -1,
})
else:
continue
formats.append(f)
self._sort_formats(formats)
return {
'id': user_id,
'title': self._live_title(user_id),
'is_live': True,
'formats': formats,
}

View File

@@ -0,0 +1,69 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_timestamp,
)
class CamTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female',
'info_dict': {
'id': '42ad3956-dd5b-445a-8313-803ea6079fac',
'display_id': 'minafay-030618-1136-chaturbate-female',
'ext': 'mp4',
'title': 'minafay-030618-1136-chaturbate-female',
'duration': 1274,
'timestamp': 1528018608,
'upload_date': '20180603',
},
'params': {
'skip_download': True,
},
}]
_API_BASE = 'https://api.camtube.co'
def _real_extract(self, url):
display_id = self._match_id(url)
token = self._download_json(
'%s/rpc/session/new' % self._API_BASE, display_id,
'Downloading session token')['token']
self._set_cookie('api.camtube.co', 'session', token)
video = self._download_json(
'%s/recordings/%s' % (self._API_BASE, display_id), display_id,
headers={'Referer': url})
video_id = video['uuid']
timestamp = unified_timestamp(video.get('createdAt'))
duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('viewCount'))
like_count = int_or_none(video.get('likeCount'))
creator = video.get('stageName')
formats = [{
'url': '%s/recordings/%s/manifest.m3u8'
% (self._API_BASE, video_id),
'format_id': 'hls',
'ext': 'mp4',
'protocol': 'm3u8_native',
}]
return {
'id': video_id,
'display_id': display_id,
'title': display_id,
'timestamp': timestamp,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'creator': creator,
'formats': formats,
}

View File

@@ -11,6 +11,7 @@ from ..utils import (
strip_or_none,
float_or_none,
int_or_none,
merge_dicts,
parse_iso8601,
)
@@ -248,9 +249,13 @@ class VrtNUIE(GigyaBaseIE):
webpage, urlh = self._download_webpage_handle(url, display_id)
title = self._html_search_regex(
info = self._search_json_ld(webpage, display_id, default={})
# title is optional here since it may be extracted by extractor
# that is delegated from here
title = strip_or_none(self._html_search_regex(
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
webpage, 'title').strip()
webpage, 'title', default=None))
description = self._html_search_regex(
r'(?ms)<div class="content__description">(.+?)</div>',
@@ -295,7 +300,7 @@ class VrtNUIE(GigyaBaseIE):
# the first one
video_id = list(video.values())[0].get('videoid')
return {
return merge_dicts(info, {
'_type': 'url_transparent',
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
'ie_key': CanvasIE.ie_key(),
@@ -307,4 +312,4 @@ class VrtNUIE(GigyaBaseIE):
'season_number': season_number,
'episode_number': episode_number,
'release_date': release_date,
}
})

View File

@@ -17,9 +17,11 @@ from ..utils import (
xpath_element,
xpath_with_ns,
find_xpath_attr,
orderedSet,
parse_duration,
parse_iso8601,
parse_age_limit,
strip_or_none,
int_or_none,
ExtractorError,
)
@@ -129,15 +131,23 @@ class CBCIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
media_ids = []
for media_id_re in (
r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
r'<div[^>]+\bid=["\']player-(\d+)',
r'guid["\']\s*:\s*["\'](\d+)'):
media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
for media_id in orderedSet(media_ids)])
return self.playlist_result(
entries, display_id,
self._og_search_title(webpage, fatal=False),
entries, display_id, strip_or_none(title),
self._og_search_description(webpage))

View File

@@ -4,13 +4,13 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
parse_duration,
parse_iso8601,
parse_resolution,
url_or_none,
)
@@ -53,8 +53,8 @@ class CCMAIE(InfoExtractor):
media_url = media['media']['url']
if isinstance(media_url, list):
for format_ in media_url:
format_url = format_.get('file')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(format_.get('file'))
if not format_url:
continue
label = format_.get('label')
f = parse_resolution(label)

View File

@@ -108,7 +108,7 @@ class CeskaTelevizeIE(InfoExtractor):
for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request(
'http://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded')

View File

@@ -31,7 +31,8 @@ class ChaturbateIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(
url, video_id, headers=self.geo_verification_headers())
m3u8_urls = []

View File

@@ -1,15 +1,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
float_or_none,
parse_iso8601,
unified_timestamp,
)
class ClypIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
_TEST = {
_TESTS = [{
'url': 'https://clyp.it/ojz2wfah',
'md5': '1d4961036c41247ecfdcc439c0cddcbb',
'info_dict': {
@@ -21,13 +25,34 @@ class ClypIE(InfoExtractor):
'timestamp': 1443515251,
'upload_date': '20150929',
},
}
}, {
'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
'info_dict': {
'id': 'b04p1odi',
'ext': 'mp3',
'title': 'GJ! (Reward Edit)',
'description': 'Metal Resistance (THE ONE edition)',
'duration': 177.789,
'timestamp': 1528241278,
'upload_date': '20180605',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
audio_id = self._match_id(url)
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
token = qs.get('token', [None])[0]
query = {}
if token:
query['token'] = token
metadata = self._download_json(
'https://api.clyp.it/%s' % audio_id, audio_id)
'https://api.clyp.it/%s' % audio_id, audio_id, query=query)
formats = []
for secure in ('', 'Secure'):
@@ -45,7 +70,7 @@ class ClypIE(InfoExtractor):
title = metadata['Title']
description = metadata.get('Description')
duration = float_or_none(metadata.get('Duration'))
timestamp = parse_iso8601(metadata.get('DateCreated'))
timestamp = unified_timestamp(metadata.get('DateCreated'))
return {
'id': audio_id,

View File

@@ -19,6 +19,7 @@ from ..compat import (
compat_cookies,
compat_etree_fromstring,
compat_getpass,
compat_integer_types,
compat_http_client,
compat_os_name,
compat_str,
@@ -51,6 +52,7 @@ from ..utils import (
GeoUtils,
int_or_none,
js_to_json,
JSON_LD_RE,
mimetype2ext,
orderedSet,
parse_codecs,
@@ -548,8 +550,26 @@ class InfoExtractor(object):
def IE_NAME(self):
return compat_str(type(self).__name__[:-2])
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
""" Returns the response handle """
@staticmethod
def __can_accept_status_code(err, expected_status):
assert isinstance(err, compat_urllib_error.HTTPError)
if expected_status is None:
return False
if isinstance(expected_status, compat_integer_types):
return err.code == expected_status
elif isinstance(expected_status, (list, tuple)):
return err.code in expected_status
elif callable(expected_status):
return expected_status(err.code) is True
else:
assert False
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
"""
Return the response handle.
See _download_webpage docstring for arguments specification.
"""
if note is None:
self.report_download_webpage(video_id)
elif note is not False:
@@ -578,6 +598,10 @@ class InfoExtractor(object):
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
return err.fp
if errnote is False:
return False
if errnote is None:
@@ -590,13 +614,17 @@ class InfoExtractor(object):
self._downloader.report_warning(errmsg)
return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
""" Returns a tuple (page content as string, URL handle) """
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
"""
Return a tuple (page content as string, URL handle).
See _download_webpage docstring for arguments specification.
"""
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
if urlh is False:
assert not fatal
return False
@@ -685,13 +713,52 @@ class InfoExtractor(object):
return content
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
""" Returns the data of the page as a string """
def _download_webpage(
self, url_or_request, video_id, note=None, errnote=None,
fatal=True, tries=1, timeout=5, encoding=None, data=None,
headers={}, query={}, expected_status=None):
"""
Return the data of the page as a string.
Arguments:
url_or_request -- plain text URL as a string or
a compat_urllib_request.Requestobject
video_id -- Video/playlist/item identifier (string)
Keyword arguments:
note -- note printed before downloading (string)
errnote -- note printed in case of an error (string)
fatal -- flag denoting whether error should be considered fatal,
i.e. whether it should cause ExtractionError to be raised,
otherwise a warning will be reported and extraction continued
tries -- number of tries
timeout -- sleep interval between tries
encoding -- encoding for a page content decoding, guessed automatically
when not explicitly specified
data -- POST data (bytes)
headers -- HTTP headers (dict)
query -- URL query (dict)
expected_status -- allows to accept failed HTTP requests (non 2xx
status code) by explicitly specifying a set of accepted status
codes. Can be any of the following entities:
- an integer type specifying an exact failed status code to
accept
- a list or a tuple of integer types specifying a list of
failed status codes to accept
- a callable accepting an actual failed status code and
returning True if it should be accepted
Note that this argument does not affect success status codes (2xx)
which are always accepted.
"""
success = False
try_count = 0
while success is False:
try:
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
res = self._download_webpage_handle(
url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data, headers=headers, query=query,
expected_status=expected_status)
success = True
except compat_http_client.IncompleteRead as e:
try_count += 1
@@ -707,11 +774,17 @@ class InfoExtractor(object):
def _download_xml_handle(
self, url_or_request, video_id, note='Downloading XML',
errnote='Unable to download XML', transform_source=None,
fatal=True, encoding=None, data=None, headers={}, query={}):
"""Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)"""
fatal=True, encoding=None, data=None, headers={}, query={},
expected_status=None):
"""
Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
See _download_webpage docstring for arguments specification.
"""
res = self._download_webpage_handle(
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
encoding=encoding, data=data, headers=headers, query=query,
expected_status=expected_status)
if res is False:
return res
xml_string, urlh = res
@@ -719,15 +792,21 @@ class InfoExtractor(object):
xml_string, video_id, transform_source=transform_source,
fatal=fatal), urlh
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element"""
def _download_xml(
self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}, expected_status=None):
"""
Return the xml as an xml.etree.ElementTree.Element.
See _download_webpage docstring for arguments specification.
"""
res = self._download_xml_handle(
url_or_request, video_id, note=note, errnote=errnote,
transform_source=transform_source, fatal=fatal, encoding=encoding,
data=data, headers=headers, query=query)
data=data, headers=headers, query=query,
expected_status=expected_status)
return res if res is False else res[0]
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
@@ -745,11 +824,17 @@ class InfoExtractor(object):
def _download_json_handle(
self, url_or_request, video_id, note='Downloading JSON metadata',
errnote='Unable to download JSON metadata', transform_source=None,
fatal=True, encoding=None, data=None, headers={}, query={}):
"""Return a tuple (JSON object, URL handle)"""
fatal=True, encoding=None, data=None, headers={}, query={},
expected_status=None):
"""
Return a tuple (JSON object, URL handle).
See _download_webpage docstring for arguments specification.
"""
res = self._download_webpage_handle(
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)
encoding=encoding, data=data, headers=headers, query=query,
expected_status=expected_status)
if res is False:
return res
json_string, urlh = res
@@ -760,11 +845,18 @@ class InfoExtractor(object):
def _download_json(
self, url_or_request, video_id, note='Downloading JSON metadata',
errnote='Unable to download JSON metadata', transform_source=None,
fatal=True, encoding=None, data=None, headers={}, query={}):
fatal=True, encoding=None, data=None, headers={}, query={},
expected_status=None):
"""
Return the JSON object as a dict.
See _download_webpage docstring for arguments specification.
"""
res = self._download_json_handle(
url_or_request, video_id, note=note, errnote=errnote,
transform_source=transform_source, fatal=fatal, encoding=encoding,
data=data, headers=headers, query=query)
data=data, headers=headers, query=query,
expected_status=expected_status)
return res if res is False else res[0]
def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
@@ -1058,8 +1150,7 @@ class InfoExtractor(object):
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
json_ld = self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
html, 'JSON-LD', group='json_ld', **kwargs)
JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
default = kwargs.get('default', NO_DEFAULT)
if not json_ld:
return default if default is not NO_DEFAULT else {}
@@ -1768,9 +1859,7 @@ class InfoExtractor(object):
'height': height,
})
formats.extend(m3u8_formats)
continue
if src_ext == 'f4m':
elif src_ext == 'f4m':
f4m_url = src_url
if not f4m_params:
f4m_params = {
@@ -1780,9 +1869,13 @@ class InfoExtractor(object):
f4m_url += '&' if '?' in f4m_url else '?'
f4m_url += compat_urllib_parse_urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
continue
if src_url.startswith('http') and self._is_valid_url(src, video_id):
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
src_url, video_id, mpd_id='dash', fatal=False))
elif re.search(r'\.ism/[Mm]anifest', src_url):
formats.extend(self._extract_ism_formats(
src_url, video_id, ism_id='mss', fatal=False))
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
http_count += 1
formats.append({
'url': src_url,
@@ -1793,7 +1886,6 @@ class InfoExtractor(object):
'width': width,
'height': height,
})
continue
return formats
@@ -2015,7 +2107,21 @@ class InfoExtractor(object):
representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
def prepare_template(template_name, identifiers):
t = representation_ms_info[template_name]
tmpl = representation_ms_info[template_name]
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
# https://github.com/rg3/youtube-dl/issues/16867).
t = ''
in_template = False
for c in tmpl:
t += c
if c == '$':
in_template = not in_template
elif c == '%' and not in_template:
t += c
# Next, $...$ templates are translated to their
# %(...) counterparts to be used with % operator
t = t.replace('$RepresentationID$', representation_id)
t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
@@ -2346,6 +2452,8 @@ class InfoExtractor(object):
media_info['subtitles'].setdefault(lang, []).append({
'url': absolute_url(src),
})
for f in media_info['formats']:
f.setdefault('http_headers', {})['Referer'] = base_url
if media_info['formats'] or media_info['subtitles']:
entries.append(media_info)
return entries

View File

@@ -4,23 +4,21 @@ from __future__ import unicode_literals, division
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_HTTPError,
)
from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
parse_age_limit,
parse_duration,
url_or_none,
ExtractorError
)
class CrackleIE(InfoExtractor):
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = {
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TESTS = [{
# geo restricted to CA
'url': 'https://www.crackle.com/andromeda/2502343',
'info_dict': {
@@ -45,7 +43,10 @@ class CrackleIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
}
}, {
'url': 'https://www.sonycrackle.com/andromeda/2502343',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -83,8 +84,8 @@ class CrackleIE(InfoExtractor):
for e in media['MediaURLs']:
if e.get('UseDRM') is True:
continue
format_url = e.get('Path')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(e.get('Path'))
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
@@ -121,8 +122,8 @@ class CrackleIE(InfoExtractor):
for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue
cc_url = cc_file.get('Path')
if not cc_url or not isinstance(cc_url, compat_str):
cc_url = url_or_none(cc_file.get('Path'))
if not cc_url:
continue
lang = cc_file.get('Locale') or 'en'
subtitles.setdefault(lang, []).append({'url': cc_url})

View File

@@ -8,6 +8,7 @@ import zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
from .vrv import VRVIE
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
@@ -18,6 +19,8 @@ from ..compat import (
from ..utils import (
ExtractorError,
bytes_to_intlist,
extract_attributes,
float_or_none,
intlist_to_bytes,
int_or_none,
lowercase_escape,
@@ -26,7 +29,6 @@ from ..utils import (
unified_strdate,
urlencode_postdata,
xpath_text,
extract_attributes,
)
from ..aes import (
aes_cbc_decrypt,
@@ -49,7 +51,7 @@ class CrunchyrollBaseIE(InfoExtractor):
})
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
@@ -139,7 +141,8 @@ class CrunchyrollBaseIE(InfoExtractor):
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
class CrunchyrollIE(CrunchyrollBaseIE):
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
IE_NAME = 'crunchyroll'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -148,7 +151,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'ext': 'mp4',
'title': 'Wanna be the Strongest in the World Episode 1 An Idol-Wrestler is Born!',
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
'url': 're:(?!.*&amp)',
@@ -221,7 +224,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'info_dict': {
'id': '535080',
'ext': 'mp4',
'title': '11eyes Episode 1 Piros éjszaka - Red Night',
'title': '11eyes Episode 1 Red Night ~ Piros éjszaka',
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
'uploader': 'Marvelous AQL Inc.',
'upload_date': '20091021',
@@ -262,6 +265,9 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# Just test metadata extraction
'skip_download': True,
},
}, {
'url': 'http://www.crunchyroll.com/media-723735',
'only_matching': True,
}]
_FORMAT_IDS = {
@@ -434,13 +440,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
if 'To view this, please log in to verify you are 18 or older.' in webpage:
self.raise_login_required()
media = self._parse_json(self._search_regex(
r'vilos\.config\.media\s*=\s*({.+?});',
webpage, 'vilos media', default='{}'), video_id)
media_metadata = media.get('metadata') or {}
video_title = self._html_search_regex(
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
webpage, 'video_title')
video_title = re.sub(r' {2,}', ' ', video_title)
video_description = self._parse_json(self._html_search_regex(
video_description = (self._parse_json(self._html_search_regex(
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
webpage, 'description', default='{}'), video_id).get('description')
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
if video_description:
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
video_upload_date = self._html_search_regex(
@@ -453,91 +464,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
webpage, 'video_uploader', fatal=False)
available_fmts = []
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
attrs = extract_attributes(a)
href = attrs.get('href')
if href and '/freetrial' in href:
continue
available_fmts.append(fmt)
if not available_fmts:
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
available_fmts = re.findall(p, webpage)
if available_fmts:
break
video_encode_ids = []
formats = []
for fmt in available_fmts:
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
stream_infos = []
streamdata = self._call_rpc_api(
'VideoPlayer_GetStandardConfig', video_id,
'Downloading media info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_quality': stream_quality,
'current_page': url,
})
if streamdata is not None:
stream_info = streamdata.find('./{default}preload/stream_info')
for stream in media.get('streams', []):
formats.extend(self._extract_vrv_formats(
stream.get('url'), video_id, stream.get('format'),
stream.get('audio_lang'), stream.get('hardsub_lang')))
if not formats:
available_fmts = []
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
attrs = extract_attributes(a)
href = attrs.get('href')
if href and '/freetrial' in href:
continue
available_fmts.append(fmt)
if not available_fmts:
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
available_fmts = re.findall(p, webpage)
if available_fmts:
break
if not available_fmts:
available_fmts = self._FORMAT_IDS.keys()
video_encode_ids = []
for fmt in available_fmts:
stream_quality, stream_format = self._FORMAT_IDS[fmt]
video_format = fmt + 'p'
stream_infos = []
streamdata = self._call_rpc_api(
'VideoPlayer_GetStandardConfig', video_id,
'Downloading media info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_quality': stream_quality,
'current_page': url,
})
if streamdata is not None:
stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None:
stream_infos.append(stream_info)
stream_info = self._call_rpc_api(
'VideoEncode_GetStreamInfo', video_id,
'Downloading stream info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
if stream_info is not None:
stream_infos.append(stream_info)
stream_info = self._call_rpc_api(
'VideoEncode_GetStreamInfo', video_id,
'Downloading stream info for %s' % video_format, data={
'media_id': video_id,
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
if stream_info is not None:
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids:
continue
video_encode_ids.append(video_encode_id)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
if video_encode_id in video_encode_ids:
continue
video_encode_ids.append(video_encode_id)
video_file = xpath_text(stream_info, './file')
if not video_file:
continue
if video_file.startswith('http'):
formats.extend(self._extract_m3u8_formats(
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
video_url = xpath_text(stream_info, './host')
if not video_url:
continue
metadata = stream_info.find('./metadata')
format_info = {
'format': video_format,
'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')),
}
if '.fplive.net/' in video_url:
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
parsed_video_url = compat_urlparse.urlparse(video_url)
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
netloc='v.lvlt.crcdn.net',
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({
'format_id': 'http-' + video_format,
'url': direct_video_url,
})
formats.append(format_info)
video_file = xpath_text(stream_info, './file')
if not video_file:
continue
if video_file.startswith('http'):
formats.extend(self._extract_m3u8_formats(
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
format_info.update({
'format_id': 'rtmp-' + video_format,
'url': video_url,
'play_path': video_file,
'ext': 'flv',
})
formats.append(format_info)
video_url = xpath_text(stream_info, './host')
if not video_url:
continue
metadata = stream_info.find('./metadata')
format_info = {
'format': video_format,
'height': int_or_none(xpath_text(metadata, './height')),
'width': int_or_none(xpath_text(metadata, './width')),
}
if '.fplive.net/' in video_url:
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
parsed_video_url = compat_urlparse.urlparse(video_url)
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
netloc='v.lvlt.crcdn.net',
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
if self._is_valid_url(direct_video_url, video_id, video_format):
format_info.update({
'format_id': 'http-' + video_format,
'url': direct_video_url,
})
formats.append(format_info)
continue
format_info.update({
'format_id': 'rtmp-' + video_format,
'url': video_url,
'play_path': video_file,
'ext': 'flv',
})
formats.append(format_info)
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
metadata = self._call_rpc_api(
@@ -546,7 +565,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'media_id': video_id,
})
subtitles = self.extract_subtitles(video_id, webpage)
subtitles = {}
for subtitle in media.get('subtitles', []):
subtitle_url = subtitle.get('url')
if not subtitle_url:
continue
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
'url': subtitle_url,
'ext': subtitle.get('format', 'ass'),
})
if not subtitles:
subtitles = self.extract_subtitles(video_id, webpage)
# webpage provide more accurate data than series_title from XML
series = self._html_search_regex(
@@ -554,8 +583,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@@ -565,7 +594,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'id': video_id,
'title': video_title,
'description': video_description,
'thumbnail': xpath_text(metadata, 'episode_image_url'),
'duration': float_or_none(media_metadata.get('duration'), 1000),
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
'uploader': video_uploader,
'upload_date': video_upload_date,
'series': series,
@@ -580,7 +610,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:playlist'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',

View File

@@ -11,10 +11,10 @@ class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
_TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '10deb320dc0ccb8d01d34d12fc2ea672',
'md5': '9b8624ba66351a23e0b6e1391971f9af',
'info_dict': {
'id': '901995',
'ext': 'mp4',
'ext': 'flv',
'title': 'Extended: \'That person cannot be me\' Johnson says',
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284,

View File

@@ -35,7 +35,7 @@ class CuriosityStreamBaseIE(InfoExtractor):
return result['data']
def _real_initialize(self):
(email, password) = self._get_login_info()
email, password = self._get_login_info()
if email is None:
return
result = self._download_json(

View File

@@ -4,7 +4,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_age_limit,
parse_iso8601,
smuggle_url,
str_or_none,
)
@@ -40,10 +43,15 @@ class CWTVIE(InfoExtractor):
'duration': 1263,
'series': 'Whose Line Is It Anyway?',
'season_number': 11,
'season': '11',
'episode_number': 20,
'upload_date': '20151006',
'timestamp': 1444107300,
'age_limit': 14,
'uploader': 'CWTV',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
@@ -58,60 +66,28 @@ class CWTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = None
formats = []
for partner in (154, 213):
vdata = self._download_json(
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
if not vdata:
continue
video_data = vdata
for quality, quality_data in vdata.get('videos', {}).items():
quality_url = quality_data.get('uri')
if not quality_url:
continue
if quality == 'variantplaylist':
formats.extend(self._extract_m3u8_formats(
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
else:
tbr = int_or_none(quality_data.get('bitrate'))
format_id = 'http' + ('-%d' % tbr if tbr else '')
if self._is_valid_url(quality_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': quality_url,
'tbr': tbr,
})
video_metadata = video_data['assetFields']
ism_url = video_metadata.get('smoothStreamingUrl')
if ism_url:
formats.extend(self._extract_ism_formats(
ism_url, video_id, ism_id='mss', fatal=False))
self._sort_formats(formats)
video_data = self._download_json(
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
video_id)['video']
title = video_data['title']
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
thumbnails = [{
'url': image['uri'],
'width': image.get('width'),
'height': image.get('height'),
} for image_id, image in video_data['images'].items() if image.get('uri')] if video_data.get('images') else None
subtitles = {
'en': [{
'url': video_metadata['UnicornCcUrl'],
}],
} if video_metadata.get('UnicornCcUrl') else None
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))
if episode and season:
episode = episode.lstrip(season)
return {
'_type': 'url_transparent',
'id': video_id,
'title': video_metadata['title'],
'description': video_metadata.get('description'),
'duration': int_or_none(video_metadata.get('duration')),
'series': video_metadata.get('seriesName'),
'season_number': int_or_none(video_metadata.get('seasonNumber')),
'season': video_metadata.get('seasonName'),
'episode_number': int_or_none(video_metadata.get('episodeNumber')),
'timestamp': parse_iso8601(video_data.get('startTime')),
'thumbnails': thumbnails,
'formats': formats,
'subtitles': subtitles,
'title': title,
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
'description': video_data.get('description_long'),
'duration': int_or_none(video_data.get('duration_secs')),
'series': video_data.get('series_name'),
'season_number': int_or_none(season),
'episode_number': int_or_none(episode),
'timestamp': parse_iso8601(video_data.get('start_time')),
'age_limit': parse_age_limit(video_data.get('rating')),
'ie_key': 'ThePlatform',
}

View File

@@ -1,22 +1,29 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
import base64
import functools
import hashlib
import itertools
import json
import random
import re
import string
from .common import InfoExtractor
from ..compat import compat_struct_pack
from ..utils import (
determine_ext,
error_to_compat_str,
ExtractorError,
int_or_none,
mimetype2ext,
OnDemandPagedList,
parse_iso8601,
sanitized_Request,
str_to_int,
unescapeHTML,
mimetype2ext,
urlencode_postdata,
)
@@ -64,7 +71,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader': 'Deadline',
'uploader_id': 'x1xm8ri',
'age_limit': 0,
'view_count': int,
},
}, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@@ -141,7 +147,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
age_limit = self._rta_search(webpage)
description = self._og_search_description(webpage) or self._html_search_meta(
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'description', webpage, 'description')
view_count_str = self._search_regex(
@@ -167,6 +174,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
player = self._parse_json(player_v5, video_id)
metadata = player['metadata']
if metadata.get('error', {}).get('type') == 'password_protected':
password = self._downloader.params.get('videopassword')
if password:
r = int(metadata['id'][1:], 36)
us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
t = ''.join(random.choice(string.ascii_letters) for i in range(10))
n = us64e(compat_struct_pack('I', r))
i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
metadata = self._download_json(
'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
self._check_error(metadata)
formats = []
@@ -302,8 +320,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
def _check_error(self, info):
error = info.get('error')
if info.get('error') is not None:
title = error['title']
if error:
title = error.get('title') or error['message']
# See https://developer.dailymotion.com/api#access-error
if error.get('code') == 'DM007':
self.raise_geo_restricted(msg=title)
@@ -328,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
'id': 'xv4bw_nqtv_sport',
'id': 'xv4bw',
},
'playlist_mincount': 20,
}]
_PAGE_SIZE = 100
def _fetch_page(self, playlist_id, authorizaion, page):
page += 1
videos = self._download_json(
'https://graphql.api.dailymotion.com',
playlist_id, 'Downloading page %d' % page,
data=json.dumps({
'query': '''{
collection(xid: "%s") {
videos(first: %d, page: %d) {
pageInfo {
hasNextPage
nextPage
}
edges {
node {
xid
url
}
}
}
}
}''' % (playlist_id, self._PAGE_SIZE, page)
}).encode(), headers={
'Authorization': authorizaion,
'Origin': 'https://www.dailymotion.com',
})['data']['collection']['videos']
for edge in videos['edges']:
node = edge['node']
yield self.url_result(
node['url'], DailymotionIE.ie_key(), node['xid'])
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
api = self._parse_json(self._search_regex(
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
webpage, 'player config'), playlist_id)['context']['api']
auth = self._download_json(
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
playlist_id, data=urlencode_postdata({
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
'grant_type': 'client_credentials',
}))
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
entries = OnDemandPagedList(functools.partial(
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
return self.playlist_result(
entries, playlist_id,
self._og_search_title(webpage))
class DailymotionUserIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
'title': 'UnderProject',
},
'playlist_mincount': 1800,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time',
}]
def _extract_entries(self, id):
video_ids = set()
@@ -364,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
break
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
return {
'_type': 'playlist',
'id': playlist_id,
'title': self._og_search_title(webpage),
'entries': self._extract_entries(playlist_id),
}
class DailymotionUserIE(DailymotionPlaylistIE):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
'title': 'Rémi Gaillard',
},
'playlist_mincount': 100,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
'title': 'UnderProject',
},
'playlist_mincount': 1800,
'expected_warnings': [
'Stopped at duplicated page',
],
'skip': 'Takes too long time',
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
user = mobj.group('user')

View File

@@ -5,13 +5,16 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
unified_strdate,
int_or_none,
unified_timestamp,
url_or_none,
)
class DctpTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
_TEST = {
_TESTS = [{
# 4x3
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
@@ -19,37 +22,55 @@ class DctpTvIE(InfoExtractor):
'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 71.24,
'timestamp': 1302172322,
'upload_date': '20110407',
},
'params': {
# rtmp download
'skip_download': True,
},
}
}, {
# 16x9
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
'only_matching': True,
}]
_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
version = self._download_json(
'%s/version.json' % self._BASE_URL, display_id,
'Downloading version JSON')
video_id = self._html_search_meta(
'DC.identifier', webpage, 'video id',
default=None) or self._search_regex(
r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
restapi_base = '%s/%s/restapi' % (
self._BASE_URL, version['version_name'])
title = self._og_search_title(webpage)
info = self._download_json(
'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
'Downloading video info JSON')
media = self._download_json(
'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
display_id, 'Downloading media JSON')
uuid = media['uuid']
title = media['title']
ratio = '16x9' if media.get('is_wide') else '4x3'
play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
servers = self._download_json(
'http://www.dctp.tv/streaming_servers/', display_id,
note='Downloading server list', fatal=False)
note='Downloading server list JSON', fatal=False)
if servers:
endpoint = next(
server['endpoint']
for server in servers
if isinstance(server.get('endpoint'), compat_str) and
if url_or_none(server.get('endpoint')) and
'cloudfront' in server['endpoint'])
else:
endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
@@ -60,27 +81,35 @@ class DctpTvIE(InfoExtractor):
formats = [{
'url': endpoint,
'app': app,
'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
'play_path': play_path,
'page_url': url,
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
'ext': 'flv',
}]
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
duration = float_or_none(self._search_regex(
r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
default=None), scale=1000)
thumbnails = []
images = media.get('images')
if isinstance(images, list):
for image in images:
if not isinstance(image, dict):
continue
image_url = url_or_none(image.get('url'))
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
return {
'id': video_id,
'title': title,
'formats': formats,
'id': uuid,
'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
'duration': duration,
'title': title,
'alt_title': media.get('subtitle'),
'description': media.get('description') or media.get('teaser'),
'timestamp': unified_timestamp(media.get('created')),
'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
'thumbnails': thumbnails,
'formats': formats,
}

View File

@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
extract_attributes,
@@ -12,6 +11,7 @@ from ..utils import (
parse_age_limit,
remove_end,
unescapeHTML,
url_or_none,
)
@@ -69,9 +69,8 @@ class DiscoveryGoBaseIE(InfoExtractor):
captions = stream.get('captions')
if isinstance(captions, list):
for caption in captions:
subtitle_url = caption.get('fileUrl')
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
not subtitle_url.startswith('http')):
subtitle_url = url_or_none(caption.get('fileUrl'))
if not subtitle_url or not subtitle_url.startswith('http'):
continue
lang = caption.get('fileLang', 'en')
ext = determine_ext(subtitle_url)

View File

@@ -3,8 +3,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
from .dplay import DPlayIE
from ..compat import (
compat_parse_qs,
compat_urlparse,
@@ -12,8 +12,13 @@ from ..compat import (
from ..utils import smuggle_url
class DiscoveryNetworksDeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:discovery|tlc|animalplanet|dmax)\.de/(?:.*#(?P<id>\d+)|(?:[^/]+/)*videos/(?P<title>[^/?#]+))'
class DiscoveryNetworksDeIE(DPlayIE):
_VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
(?:
.*\#(?P<id>\d+)|
(?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
)'''
_TESTS = [{
'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
@@ -40,6 +45,14 @@ class DiscoveryNetworksDeIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
alternate_id = mobj.group('alternate_id')
if alternate_id:
self._initialize_geo_bypass({
'countries': ['DE'],
})
return self._get_disco_api_info(
url, '%s/%s' % (mobj.group('programme'), alternate_id),
'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
brightcove_id = mobj.group('id')
if not brightcove_id:
title = mobj.group('title')

View File

@@ -21,6 +21,7 @@ from ..utils import (
unified_strdate,
unified_timestamp,
update_url_query,
urljoin,
USER_AGENTS,
)
@@ -97,6 +98,75 @@ class DPlayIE(InfoExtractor):
'only_matching': True,
}]
def _get_disco_api_info(self, url, display_id, disco_host, realm):
disco_base = 'https://' + disco_host
token = self._download_json(
'%s/token' % disco_base, display_id, 'Downloading token',
query={
'realm': realm,
})['data']['attributes']['token']
headers = {
'Referer': url,
'Authorization': 'Bearer ' + token,
}
video = self._download_json(
'%s/content/videos/%s' % (disco_base, display_id), display_id,
headers=headers, query={
'include': 'show'
})
video_id = video['data']['id']
info = video['data']['attributes']
title = info['name']
formats = []
for format_id, format_dict in self._download_json(
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
display_id, headers=headers)['data']['attributes']['streaming'].items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
if format_id == 'dash' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id='dash', fatal=False))
elif format_id == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
series = None
try:
included = video.get('included')
if isinstance(included, list):
show = next(e for e in included if e.get('type') == 'show')
series = try_get(
show, lambda x: x['attributes']['name'], compat_str)
except StopIteration:
pass
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': info.get('description'),
'duration': float_or_none(
info.get('videoDuration'), scale=1000),
'timestamp': unified_timestamp(info.get('publishStart')),
'series': series,
'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')),
'age_limit': int_or_none(info.get('minimum_age')),
'formats': formats,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
@@ -113,72 +183,8 @@ class DPlayIE(InfoExtractor):
if not video_id:
host = mobj.group('host')
disco_base = 'https://disco-api.%s' % host
self._download_json(
'%s/token' % disco_base, display_id, 'Downloading token',
query={
'realm': host.replace('.', ''),
})
video = self._download_json(
'%s/content/videos/%s' % (disco_base, display_id), display_id,
headers={
'Referer': url,
'x-disco-client': 'WEB:UNKNOWN:dplay-client:0.0.1',
}, query={
'include': 'show'
})
video_id = video['data']['id']
info = video['data']['attributes']
title = info['name']
formats = []
for format_id, format_dict in self._download_json(
'%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
display_id)['data']['attributes']['streaming'].items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url:
continue
ext = determine_ext(format_url)
if format_id == 'dash' or ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, display_id, mpd_id='dash', fatal=False))
elif format_id == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls',
fatal=False))
else:
formats.append({
'url': format_url,
'format_id': format_id,
})
self._sort_formats(formats)
series = None
try:
included = video.get('included')
if isinstance(included, list):
show = next(e for e in included if e.get('type') == 'show')
series = try_get(
show, lambda x: x['attributes']['name'], compat_str)
except StopIteration:
pass
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': info.get('description'),
'duration': float_or_none(
info.get('videoDuration'), scale=1000),
'timestamp': unified_timestamp(info.get('publishStart')),
'series': series,
'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')),
'age_limit': int_or_none(info.get('minimum_age')),
'formats': formats,
}
return self._get_disco_api_info(
url, display_id, 'disco-api.' + host, host.replace('.', ''))
info = self._download_json(
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
@@ -305,9 +311,11 @@ class DPlayItIE(InfoExtractor):
if not info:
info_url = self._search_regex(
r'url\s*[:=]\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
webpage, 'info url')
(r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
webpage, 'info url', group='url')
info_url = urljoin(url, info_url)
video_id = info_url.rpartition('/')[-1]
try:
@@ -317,6 +325,8 @@ class DPlayItIE(InfoExtractor):
'dplayit_token').value,
'Referer': url,
})
if isinstance(info, compat_str):
info = self._parse_json(info, display_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
@@ -332,6 +342,7 @@ class DPlayItIE(InfoExtractor):
formats = self._extract_m3u8_formats(
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
series = self._html_search_regex(
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',

View File

@@ -7,7 +7,6 @@ import json
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urlparse,
)
from ..utils import (
@@ -17,6 +16,7 @@ from ..utils import (
parse_age_limit,
parse_duration,
unified_timestamp,
url_or_none,
)
@@ -42,7 +42,7 @@ class DramaFeverBaseIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
@@ -139,8 +139,8 @@ class DramaFeverIE(DramaFeverBaseIE):
for sub in subs:
if not isinstance(sub, dict):
continue
sub_url = sub.get('url')
if not sub_url or not isinstance(sub_url, compat_str):
sub_url = url_or_none(sub.get('url'))
if not sub_url:
continue
subtitles.setdefault(
sub.get('code') or sub.get('language') or 'en', []).append({
@@ -163,8 +163,8 @@ class DramaFeverIE(DramaFeverBaseIE):
for format_id, format_dict in download_assets.items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(format_dict.get('url'))
if not format_url:
continue
formats.append({
'url': format_url,

View File

@@ -4,14 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
unsmuggle_url,
url_or_none,
)
@@ -177,7 +175,7 @@ class EaglePlatformIE(InfoExtractor):
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not isinstance(format_url, compat_str):
if not url_or_none(format_url):
continue
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):

View File

@@ -8,6 +8,7 @@ from ..utils import (
int_or_none,
try_get,
unified_timestamp,
url_or_none,
)
@@ -34,8 +35,8 @@ class EggheadCourseIE(InfoExtractor):
entries = []
for lesson in lessons:
lesson_url = lesson.get('http_url')
if not lesson_url or not isinstance(lesson_url, compat_str):
lesson_url = url_or_none(lesson.get('http_url'))
if not lesson_url:
continue
lesson_id = lesson.get('id')
if lesson_id:
@@ -95,7 +96,8 @@ class EggheadLessonIE(InfoExtractor):
formats = []
for _, format_url in lesson['media_urls'].items():
if not format_url or not isinstance(format_url, compat_str):
format_url = url_or_none(format_url)
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'm3u8':

View File

@@ -11,6 +11,7 @@ from ..utils import (
int_or_none,
parse_duration,
str_to_int,
url_or_none,
)
@@ -82,8 +83,8 @@ class EpornerIE(InfoExtractor):
for format_id, format_dict in formats_dict.items():
if not isinstance(format_dict, dict):
continue
src = format_dict.get('src')
if not isinstance(src, compat_str) or not src.startswith('http'):
src = url_or_none(format_dict.get('src'))
if not src or not src.startswith('http'):
continue
if kind == 'hls':
formats.extend(self._extract_m3u8_formats(

View File

@@ -0,0 +1,98 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
unescapeHTML,
unified_timestamp,
)
class ExpressenIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?expressen\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
_TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
'md5': '2fbbe3ca14392a6b1b36941858d33a45',
'info_dict': {
'id': '8690962',
'ext': 'mp4',
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 788,
'timestamp': 1526639109,
'upload_date': '20180518',
},
}, {
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
webpage)]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
def extract_data(name):
return self._parse_json(
self._search_regex(
r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
webpage, 'info', group='value'),
display_id, transform_source=unescapeHTML)
info = extract_data('video-tracking-info')
video_id = info['videoId']
data = extract_data('article-data')
stream = data['stream']
if determine_ext(stream) == 'm3u8':
formats = self._extract_m3u8_formats(
stream, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
formats = [{
'url': stream,
}]
self._sort_formats(formats)
title = info.get('titleRaw') or data['title']
description = info.get('descriptionRaw')
thumbnail = info.get('socialMediaImage') or data.get('image')
duration = int_or_none(info.get('videoTotalSecondsDuration') or
data.get('totalSecondsDuration'))
timestamp = unified_timestamp(info.get('publishDate'))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
}

View File

@@ -44,6 +44,7 @@ from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
from .apa import APAIE
from .aparat import AparatIE
from .appleconnect import AppleConnectIE
from .appletrailers import (
@@ -53,6 +54,7 @@ from .appletrailers import (
from .archiveorg import ArchiveOrgIE
from .arkena import ArkenaIE
from .ard import (
ARDBetaMediathekIE,
ARDIE,
ARDMediathekIE,
)
@@ -117,6 +119,10 @@ from .bilibili import (
BiliBiliBangumiIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
BitChuteIE,
BitChuteChannelIE,
)
from .biqle import BIQLEIE
from .bleacherreport import (
BleacherReportIE,
@@ -145,6 +151,8 @@ from .camdemy import (
CamdemyIE,
CamdemyFolderIE
)
from .cammodels import CamModelsIE
from .camtube import CamTubeIE
from .camwithher import CamWithHerIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
@@ -332,6 +340,7 @@ from .esri import EsriVideoIE
from .europa import EuropaIE
from .everyonesmixtape import EveryonesMixtapeIE
from .expotv import ExpoTVIE
from .expressen import ExpressenIE
from .extremetube import ExtremeTubeIE
from .eyedotv import EyedoTVIE
from .facebook import (
@@ -369,7 +378,6 @@ from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
FoxNewsArticleIE,
FoxNewsInsiderIE,
)
from .foxsports import FoxSportsIE
from .franceculture import FranceCultureIE
@@ -379,6 +387,7 @@ from .francetv import (
FranceTVSiteIE,
FranceTVEmbedIE,
FranceTVInfoIE,
FranceTVInfoSportIE,
FranceTVJeunesseIE,
GenerationWhatIE,
CultureboxIE,
@@ -386,6 +395,11 @@ from .francetv import (
from .freesound import FreesoundIE
from .freespeech import FreespeechIE
from .freshlive import FreshLiveIE
from .frontendmasters import (
FrontendMastersIE,
FrontendMastersLessonIE,
FrontendMastersCourseIE
)
from .funimation import FunimationIE
from .funk import (
FunkMixIE,
@@ -507,6 +521,7 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
from .kinopoisk import KinoPoiskIE
from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE
@@ -585,6 +600,10 @@ from .mangomolo import (
MangomoloLiveIE,
)
from .manyvids import ManyVidsIE
from .markiza import (
MarkizaIE,
MarkizaPageIE,
)
from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
@@ -703,12 +722,7 @@ from .nexx import (
from .nfb import NFBIE
from .nfl import NFLIE
from .nhk import NhkVodIE
from .nhl import (
NHLVideocenterIE,
NHLNewsIE,
NHLVideocenterCategoryIE,
NHLIE,
)
from .nhl import NHLIE
from .nick import (
NickIE,
NickBrIE,
@@ -717,10 +731,7 @@ from .nick import (
NickRuIE,
)
from .niconico import NiconicoIE, NiconicoPlaylistIE
from .ninecninemedia import (
NineCNineMediaStackIE,
NineCNineMediaIE,
)
from .ninecninemedia import NineCNineMediaIE
from .ninegag import NineGagIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
@@ -731,7 +742,10 @@ from .nonktube import NonkTubeIE
from .noovo import NoovoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nova import NovaIE
from .nova import (
NovaEmbedIE,
NovaIE,
)
from .novamov import (
AuroraVidIE,
CloudTimeIE,
@@ -763,7 +777,9 @@ from .nrk import (
NRKSkoleIE,
NRKTVIE,
NRKTVDirekteIE,
NRKTVEpisodeIE,
NRKTVEpisodesIE,
NRKTVSeasonIE,
NRKTVSeriesIE,
)
from .ntvde import NTVDeIE
@@ -853,6 +869,10 @@ from .pornhub import (
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
)
from .presstv import PressTVIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
@@ -884,7 +904,10 @@ from .rai import (
RaiPlayPlaylistIE,
RaiIE,
)
from .raywenderlich import RayWenderlichIE
from .raywenderlich import (
RayWenderlichIE,
RayWenderlichCourseIE,
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
from .redbulltv import RedBullTVIE
@@ -1040,6 +1063,7 @@ from .stretchinternet import StretchInternetIE
from .sunporno import SunPornoIE
from .svt import (
SVTIE,
SVTPageIE,
SVTPlayIE,
SVTSeriesIE,
)
@@ -1143,6 +1167,7 @@ from .tvc import (
from .tvigle import TvigleIE
from .tvland import TVLandIE
from .tvn24 import TVN24IE
from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
@@ -1157,6 +1182,7 @@ from .tvp import (
from .tvplay import (
TVPlayIE,
ViafreeIE,
TVPlayHomeIE,
)
from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
@@ -1278,6 +1304,7 @@ from .viki import (
VikiIE,
VikiChannelIE,
)
from .viqeo import ViqeoIE
from .viu import (
ViuIE,
ViuPlaylistIE,
@@ -1403,6 +1430,7 @@ from .younow import (
YouNowMomentIE,
)
from .youporn import YouPornIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .youtube import (
YoutubeIE,

View File

@@ -20,6 +20,7 @@ from ..utils import (
int_or_none,
js_to_json,
limit_length,
parse_count,
sanitized_Request,
try_get,
urlencode_postdata,
@@ -56,6 +57,7 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
_TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@@ -74,7 +76,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '274175099429670',
'ext': 'mp4',
'title': 'Asif Nawab Butt posted a video to his Timeline.',
'title': 're:^Asif Nawab Butt posted a video',
'uploader': 'Asif Nawab Butt',
'upload_date': '20140506',
'timestamp': 1399398998,
@@ -132,7 +134,7 @@ class FacebookIE(InfoExtractor):
}, {
# have 1080P, but only up to 720p in swf params
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '0d9813160b146b3bc8744e006027fcc6',
'md5': '9571fae53d4165bbbadb17a94651dcdc',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
@@ -141,6 +143,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20161030',
'uploader': 'CNN',
'thumbnail': r're:^https?://.*',
'view_count': int,
},
}, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@@ -148,7 +151,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '1417995061575415',
'ext': 'mp4',
'title': 'md5:a7b86ca673f51800cd54687b7f4012fe',
'title': 'md5:1db063d6a8c13faa8da727817339c857',
'timestamp': 1486648217,
'upload_date': '20170209',
'uploader': 'Yaroslav Korpan',
@@ -175,7 +178,7 @@ class FacebookIE(InfoExtractor):
'info_dict': {
'id': '1396382447100162',
'ext': 'mp4',
'title': 'md5:e2d2700afdf84e121f5d0f999bad13a3',
'title': 'md5:19a428bbde91364e3de815383b54a235',
'timestamp': 1486035494,
'upload_date': '20170202',
'uploader': 'Elisabeth Ahtn',
@@ -208,6 +211,17 @@ class FacebookIE(InfoExtractor):
# no title
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
'only_matching': True,
}, {
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
'info_dict': {
'id': '359649331226507',
'ext': 'mp4',
'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
'uploader': 'ESL One Dota 2',
},
'params': {
'skip_download': True,
},
}]
@staticmethod
@@ -226,7 +240,7 @@ class FacebookIE(InfoExtractor):
return urls
def _login(self):
(useremail, password) = self._get_login_info()
useremail, password = self._get_login_info()
if useremail is None:
return
@@ -312,16 +326,18 @@ class FacebookIE(InfoExtractor):
if server_js_data:
video_data = extract_video_data(server_js_data.get('instances', []))
def extract_from_jsmods_instances(js_data):
if js_data:
return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or [])
if not video_data:
server_js_data = self._parse_json(
self._search_regex(
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if server_js_data:
video_data = extract_video_data(try_get(
server_js_data, lambda x: x['jsmods']['instances'],
list) or [])
video_data = extract_from_jsmods_instances(server_js_data)
if not video_data:
if not fatal_if_no_video:
@@ -333,8 +349,35 @@ class FacebookIE(InfoExtractor):
expected=True)
elif '>You must log in to continue' in webpage:
self.raise_login_required()
else:
raise ExtractorError('Cannot parse data')
# Video info not in first request, do a secondary request using
# tahoe player specific URL
tahoe_data = self._download_webpage(
self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
data=urlencode_postdata({
'__a': 1,
'__pc': self._search_regex(
r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
'pkg cohort', default='PHASED:DEFAULT'),
'__rev': self._search_regex(
r'client_revision["\']\s*:\s*(\d+),', webpage,
'client revision', default='3944515'),
'fb_dtsg': self._search_regex(
r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
webpage, 'dtsg token', default=''),
}),
headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
tahoe_js_data = self._parse_json(
self._search_regex(
r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
'tahoe js data', default='{}'),
video_id, fatal=False)
video_data = extract_from_jsmods_instances(tahoe_js_data)
if not video_data:
raise ExtractorError('Cannot parse data')
formats = []
for f in video_data:
@@ -380,12 +423,17 @@ class FacebookIE(InfoExtractor):
video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id(
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', fatal=False)
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
fatal=False) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
thumbnail = self._og_search_thumbnail(webpage)
view_count = parse_count(self._search_regex(
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
default=None))
info_dict = {
'id': video_id,
'title': video_title,
@@ -393,6 +441,7 @@ class FacebookIE(InfoExtractor):
'uploader': uploader,
'timestamp': timestamp,
'thumbnail': thumbnail,
'view_count': view_count,
}
return webpage, info_dict

View File

@@ -46,7 +46,7 @@ class FC2IE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None or password is None:
return False

View File

@@ -10,6 +10,7 @@ from ..utils import (
int_or_none,
qualities,
unified_strdate,
url_or_none,
)
@@ -88,8 +89,8 @@ class FirstTVIE(InfoExtractor):
formats = []
path = None
for f in item.get('mbr', []):
src = f.get('src')
if not src or not isinstance(src, compat_str):
src = url_or_none(f.get('src'))
if not src:
continue
tbr = int_or_none(self._search_regex(
r'_(\d{3,})\.mp4', src, 'tbr', default=None))

View File

@@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
},
]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
webpage)]
def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups()
@@ -68,21 +76,41 @@ class FoxNewsIE(AMPIE):
class FoxNewsArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
_VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
IE_NAME = 'foxnews:article'
_TEST = {
_TESTS = [{
# data-video-id
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': '62aa5a781b308fdee212ebb6f33ae7ef',
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
'info_dict': {
'id': '5116295019001',
'ext': 'mp4',
'title': 'Trump and Clinton asked to defend positions on Iraq War',
'description': 'Veterans react on \'The Kelly File\'',
'timestamp': 1473299755,
'timestamp': 1473301045,
'upload_date': '20160908',
},
}
}, {
# iframe embed
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'info_dict': {
'id': '5748266721001',
'ext': 'flv',
'title': 'Kyle Kashuv has a positive message for the Trump White House',
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 229,
'timestamp': 1520594670,
'upload_date': '20180309',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -90,51 +118,10 @@ class FoxNewsArticleIE(InfoExtractor):
video_id = self._html_search_regex(
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
webpage, 'video ID', group='id')
webpage, 'video ID', group='id', default=None)
if video_id:
return self.url_result(
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
return self.url_result(
'http://video.foxnews.com/v/' + video_id,
FoxNewsIE.ie_key())
class FoxNewsInsiderIE(InfoExtractor):
_VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P<id>[a-z-]+)'
IE_NAME = 'foxnews:insider'
_TEST = {
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
'md5': 'a10c755e582d28120c62749b4feb4c0c',
'info_dict': {
'id': '5099377331001',
'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words',
'ext': 'mp4',
'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive',
'description': 'Is campus censorship getting out of control?',
'timestamp': 1472168725,
'upload_date': '20160825',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': [FoxNewsIE.ie_key()],
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
return {
'_type': 'url_transparent',
'ie_key': FoxNewsIE.ie_key(),
'url': embed_url,
'display_id': display_id,
'title': title,
'description': description,
}
FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())

View File

@@ -16,6 +16,7 @@ from ..utils import (
int_or_none,
parse_duration,
try_get,
url_or_none,
)
from .dailymotion import DailymotionIE
@@ -115,14 +116,13 @@ class FranceTVIE(InfoExtractor):
def sign(manifest_url, manifest_id):
for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
signed_url = self._download_webpage(
signed_url = url_or_none(self._download_webpage(
'https://%s/esi/TA' % host, video_id,
'Downloading signed %s manifest URL' % manifest_id,
fatal=False, query={
'url': manifest_url,
})
if (signed_url and isinstance(signed_url, compat_str) and
re.search(r'^(?:https?:)?//', signed_url)):
}))
if signed_url:
return signed_url
return manifest_url
@@ -379,6 +379,31 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
return self._make_url_result(video_id, catalogue)
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
IE_NAME = 'sport.francetvinfo.fr'
_VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
'info_dict': {
'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
'ext': 'mp4',
'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
'timestamp': 1523639962,
'upload_date': '20180413',
},
'params': {
'skip_download': True,
},
'add_ie': [FranceTVIE.ie_key()],
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
return self._make_url_result(video_id, 'Sport-web')
class GenerationWhatIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-what'
_VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'

View File

@@ -0,0 +1,263 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
ExtractorError,
parse_duration,
url_or_none,
urlencode_postdata,
)
class FrontendMastersBaseIE(InfoExtractor):
_API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
_LOGIN_URL = 'https://frontendmasters.com/login/'
_NETRC_MACHINE = 'frontendmasters'
_QUALITIES = {
'low': {'width': 480, 'height': 360},
'mid': {'width': 1280, 'height': 720},
'high': {'width': 1920, 'height': 1080}
}
def _real_initialize(self):
self._login()
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
login_form = self._hidden_inputs(login_page)
login_form.update({
'username': username,
'password': password
})
post_url = self._search_regex(
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
'post_url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'})
# Successful login
if any(p in response for p in (
'wp-login.php?action=logout', '>Logout')):
return
error = self._html_search_regex(
r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
response, 'error message', default=None, group='error')
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
def _download_course(self, course_name, url):
return self._download_json(
'%s/courses/%s' % (self._API_BASE, course_name), course_name,
'Downloading course JSON', headers={'Referer': url})
@staticmethod
def _extract_chapters(course):
chapters = []
lesson_elements = course.get('lessonElements')
if isinstance(lesson_elements, list):
chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
return chapters
@staticmethod
def _extract_lesson(chapters, lesson_id, lesson):
title = lesson.get('title') or lesson_id
display_id = lesson.get('slug')
description = lesson.get('description')
thumbnail = lesson.get('thumbnail')
chapter_number = None
index = lesson.get('index')
element_index = lesson.get('elementIndex')
if (isinstance(index, int) and isinstance(element_index, int) and
index < element_index):
chapter_number = element_index - index
chapter = (chapters[chapter_number - 1]
if chapter_number - 1 < len(chapters) else None)
duration = None
timestamp = lesson.get('timestamp')
if isinstance(timestamp, compat_str):
mobj = re.search(
r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
timestamp)
if mobj:
duration = parse_duration(mobj.group('end')) - parse_duration(
mobj.group('start'))
return {
'_type': 'url_transparent',
'url': 'frontendmasters:%s' % lesson_id,
'ie_key': FrontendMastersIE.ie_key(),
'id': lesson_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'chapter': chapter,
'chapter_number': chapter_number,
}
class FrontendMastersIE(FrontendMastersBaseIE):
_VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
'md5': '7f161159710d6b7016a4f4af6fcb05e2',
'info_dict': {
'id': 'a2qogef6ba',
'ext': 'mp4',
'title': 'a2qogef6ba',
},
'skip': 'Requires FrontendMasters account credentials',
}, {
'url': 'frontendmasters:a2qogef6ba',
'only_matching': True,
}]
def _real_extract(self, url):
lesson_id = self._match_id(url)
source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id)
formats = []
for ext in ('webm', 'mp4'):
for quality in ('low', 'mid', 'high'):
resolution = self._QUALITIES[quality].copy()
format_id = '%s-%s' % (ext, quality)
format_url = self._download_json(
source_url, lesson_id,
'Downloading %s source JSON' % format_id, query={
'f': ext,
'r': resolution['height'],
}, headers={
'Referer': url,
}, fatal=False)['url']
if not format_url:
continue
f = resolution.copy()
f.update({
'url': format_url,
'ext': ext,
'format_id': format_id,
})
formats.append(f)
self._sort_formats(formats)
subtitles = {
'en': [{
'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id),
}]
}
return {
'id': lesson_id,
'title': lesson_id,
'formats': formats,
'subtitles': subtitles
}
class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
_TEST = {
'url': 'https://frontendmasters.com/courses/web-development/tools',
'info_dict': {
'id': 'a2qogef6ba',
'display_id': 'tools',
'ext': 'mp4',
'title': 'Tools',
'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
'thumbnail': r're:^https?://.*\.jpg$',
'chapter': 'Introduction',
'chapter_number': 1,
},
'params': {
'skip_download': True,
},
'skip': 'Requires FrontendMasters account credentials',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_name, lesson_name = mobj.group('course_name', 'lesson_name')
course = self._download_course(course_name, url)
lesson_id, lesson = next(
(video_id, data)
for video_id, data in course['lessonData'].items()
if data.get('slug') == lesson_name)
chapters = self._extract_chapters(course)
return self._extract_lesson(chapters, lesson_id, lesson)
class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
_VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
_TEST = {
'url': 'https://frontendmasters.com/courses/web-development/',
'info_dict': {
'id': 'web-development',
'title': 'Introduction to Web Development',
'description': 'md5:9317e6e842098bf725d62360e52d49a6',
},
'playlist_count': 81,
'skip': 'Requires FrontendMasters account credentials',
}
@classmethod
def suitable(cls, url):
return False if FrontendMastersLessonIE.suitable(url) else super(
FrontendMastersBaseIE, cls).suitable(url)
def _real_extract(self, url):
course_name = self._match_id(url)
course = self._download_course(course_name, url)
chapters = self._extract_chapters(course)
lessons = sorted(
course['lessonData'].values(), key=lambda data: data['index'])
entries = []
for lesson in lessons:
lesson_name = lesson.get('slug')
if not lesson_name:
continue
lesson_id = lesson.get('hash') or lesson.get('statsId')
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
title = course.get('title')
description = course.get('description')
return self.playlist_result(entries, course_name, title, description)

View File

@@ -51,7 +51,7 @@ class FunimationIE(InfoExtractor):
}]
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return
try:

View File

@@ -1,10 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
import re
from .common import InfoExtractor
from .nexx import NexxIE
from ..compat import compat_str
from ..utils import (
int_or_none,
try_get,
@@ -12,6 +14,19 @@ from ..utils import (
class FunkBaseIE(InfoExtractor):
_HEADERS = {
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
}
_AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
@staticmethod
def _make_headers(referer):
headers = FunkBaseIE._HEADERS.copy()
headers['Referer'] = referer
return headers
def _make_url_result(self, video):
return {
'_type': 'url_transparent',
@@ -48,19 +63,19 @@ class FunkMixIE(FunkBaseIE):
lists = self._download_json(
'https://www.funk.net/api/v3.1/curation/curatedLists/',
mix_id, headers={
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
'Referer': url,
}, query={
mix_id, headers=self._make_headers(url), query={
'size': 100,
})['result']['lists']
})['_embedded']['curatedListList']
metas = next(
l for l in lists
if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
video = next(
meta['videoDataDelegate']
for meta in metas if meta.get('alias') == alias)
for meta in metas
if try_get(
meta, lambda x: x['videoDataDelegate']['alias'],
compat_str) == alias)
return self._make_url_result(video)
@@ -104,25 +119,53 @@ class FunkChannelIE(FunkBaseIE):
channel_id = mobj.group('id')
alias = mobj.group('alias')
headers = {
'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
'Referer': url,
}
headers = self._make_headers(url)
video = None
by_id_list = self._download_json(
'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
headers=headers, query={
'ids': alias,
}, fatal=False)
if by_id_list:
video = try_get(by_id_list, lambda x: x['result'][0], dict)
# Id-based channels are currently broken on their side: webplayer
# tries to process them via byChannelAlias endpoint and fails
# predictably.
for page_num in itertools.count():
by_channel_alias = self._download_json(
'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
% channel_id,
'Downloading byChannelAlias JSON page %d' % (page_num + 1),
headers=headers, query={
'filterFsk': 'false',
'sort': 'creationDate,desc',
'size': 100,
'page': page_num,
}, fatal=False)
if not by_channel_alias:
break
video_list = try_get(
by_channel_alias, lambda x: x['_embedded']['videoList'], list)
if not video_list:
break
try:
video = next(r for r in video_list if r.get('alias') == alias)
break
except StopIteration:
pass
if not try_get(
by_channel_alias, lambda x: x['_links']['next']):
break
if not video:
by_id_list = self._download_json(
'https://www.funk.net/api/v3.0/content/videos/byIdList',
channel_id, 'Downloading byIdList JSON', headers=headers,
query={
'ids': alias,
}, fatal=False)
if by_id_list:
video = try_get(by_id_list, lambda x: x['result'][0], dict)
if not video:
results = self._download_json(
'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
headers=headers, query={
'https://www.funk.net/api/v3.0/content/videos/filter',
channel_id, 'Downloading filter JSON', headers=headers, query={
'channelId': channel_id,
'size': 100,
})['result']

View File

@@ -91,7 +91,7 @@ class GDCVaultIE(InfoExtractor):
]
def _login(self, webpage_url, display_id):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None or password is None:
self.report_warning('It looks like ' + webpage_url + ' requires a login. Try specifying a username and password and try again.')
return None

View File

@@ -110,6 +110,10 @@ from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE
from .expressen import ExpressenIE
class GenericIE(InfoExtractor):
@@ -1393,17 +1397,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
# SVT embed
{
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
'info_dict': {
'id': '2900353',
'ext': 'flv',
'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
'duration': 27,
'age_limit': 0,
},
},
# Crooks and Liars embed
{
'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
@@ -2041,6 +2034,23 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
{
# APA embed via JWPlatform embed
'url': 'http://www.vol.at/blue-man-group/5593454',
'info_dict': {
'id': 'jjv85FdZ',
'ext': 'mp4',
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 254,
'timestamp': 1519211149,
'upload_date': '20180221',
},
'params': {
'skip_download': True,
},
},
{
'url': 'http://share-videos.se/auto/video/83645793?uid=13',
'md5': 'b68d276de422ab07ee1d49388103f457',
@@ -2051,6 +2061,30 @@ class GenericIE(InfoExtractor):
},
'skip': 'TODO: fix nested playlists processing in tests',
},
{
# Viqeo embeds
'url': 'https://viqeo.tv/',
'info_dict': {
'id': 'viqeo',
'title': 'All-new video platform',
},
'playlist_count': 6,
},
{
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
'info_dict': {
'id': 'shell',
'ext': 'mp4',
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download MPD manifest'],
},
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -3058,7 +3092,7 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
peertube_urls = PeerTubeIE._extract_urls(webpage)
peertube_urls = PeerTubeIE._extract_urls(webpage, url)
if peertube_urls:
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
@@ -3068,6 +3102,16 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
apa_urls = APAIE._extract_urls(webpage)
if apa_urls:
return self.playlist_from_matches(
apa_urls, video_id, video_title, ie=APAIE.ie_key())
foxnews_urls = FoxNewsIE._extract_urls(webpage)
if foxnews_urls:
return self.playlist_from_matches(
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)]
@@ -3075,6 +3119,16 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
sharevideos_urls, video_id, video_title)
viqeo_urls = ViqeoIE._extract_urls(webpage)
if viqeo_urls:
return self.playlist_from_matches(
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
expressen_urls = ExpressenIE._extract_urls(webpage)
if expressen_urls:
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:

View File

@@ -23,7 +23,6 @@ from ..utils import (
class GloboIE(InfoExtractor):
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
_LOGGED_IN = False
_NETRC_MACHINE = 'globo'
_TESTS = [{
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
@@ -68,9 +67,6 @@ class GloboIE(InfoExtractor):
}]
def _real_initialize(self):
if self._LOGGED_IN:
return
email, password = self._get_login_info()
if email is None:
return
@@ -91,7 +87,6 @@ class GloboIE(InfoExtractor):
resp = self._parse_json(e.cause.read(), None)
raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
raise
self._LOGGED_IN = True
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
'requestor_id': 'DisneyXD',
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
_TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': {
@@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
}, {
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
'only_matching': True,
}, {
# brand 004
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
'only_matching': True,
}, {
# brand 008
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
}]
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
def _real_extract(self, url):
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
site_info = self._SITE_INFO[sub_domain]
brand = site_info['brand']
if not video_id:
webpage = self._download_webpage(url, display_id)
site_info = self._SITE_INFO.get(sub_domain, {})
brand = site_info.get('brand')
if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
default=None)
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
default='004')
site_info = next(
si for _, si in self._SITE_INFO.items()
if si.get('brand') == brand)
if not video_id:
# show extraction works for Disney, DisneyJunior and DisneyXD
# ABC and Freeform has different layout

View File

@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
ExtractorError,
@@ -14,8 +15,8 @@ from ..utils import (
class Go90IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?go90\.com/videos/(?P<id>[0-9a-zA-Z]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
_TESTS = [{
'url': 'https://www.go90.com/videos/84BUqjLpf9D',
'md5': 'efa7670dbbbf21a7b07b360652b24a32',
'info_dict': {
@@ -27,15 +28,31 @@ class Go90IE(InfoExtractor):
'upload_date': '20170411',
'age_limit': 14,
}
}
}, {
'url': 'https://www.go90.com/embed/261MflWkD3N',
'only_matching': True,
}]
_GEO_BYPASS = False
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'https://www.go90.com/api/view/items/' + video_id,
video_id, headers={
try:
headers = self.geo_verification_headers()
headers.update({
'Content-Type': 'application/json; charset=utf-8',
}, data=b'{"client":"web","device_type":"pc"}')
})
video_data = self._download_json(
'https://www.go90.com/api/view/items/' + video_id, video_id,
headers=headers, data=b'{"client":"web","device_type":"pc"}')
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
message = self._parse_json(e.cause.read().decode(), None)['error']['message']
if 'region unavailable' in message:
self.raise_geo_restricted(countries=['US'])
raise ExtractorError(message, expected=True)
raise
if video_data.get('requires_drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
main_video_asset = video_data['main_video_asset']

View File

@@ -8,6 +8,7 @@ from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
urlencode_postdata,
)
@@ -18,7 +19,6 @@ class HiDiveIE(InfoExtractor):
# so disabling geo bypass completely
_GEO_BYPASS = False
_NETRC_MACHINE = 'hidive'
_LOGGED_IN = False
_LOGIN_URL = 'https://www.hidive.com/account/login'
_TESTS = [{
@@ -38,10 +38,7 @@ class HiDiveIE(InfoExtractor):
}]
def _real_initialize(self):
if self._LOGGED_IN:
return
(email, password) = self._get_login_info()
email, password = self._get_login_info()
if email is None:
return
@@ -56,7 +53,6 @@ class HiDiveIE(InfoExtractor):
})
self._download_webpage(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
self._LOGGED_IN = True
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -85,8 +81,8 @@ class HiDiveIE(InfoExtractor):
bitrates = rendition.get('bitrates')
if not isinstance(bitrates, dict):
continue
m3u8_url = bitrates.get('hls')
if not isinstance(m3u8_url, compat_str):
m3u8_url = url_or_none(bitrates.get('hls'))
if not m3u8_url:
continue
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
@@ -98,9 +94,8 @@ class HiDiveIE(InfoExtractor):
if not isinstance(cc_file, list) or len(cc_file) < 3:
continue
cc_lang = cc_file[0]
cc_url = cc_file[2]
if not isinstance(cc_lang, compat_str) or not isinstance(
cc_url, compat_str):
cc_url = url_or_none(cc_file[2])
if not isinstance(cc_lang, compat_str) or not cc_url:
continue
subtitles.setdefault(cc_lang, []).append({
'url': cc_url,

View File

@@ -66,7 +66,7 @@ class HRTiBaseIE(InfoExtractor):
self._logout_url = modules['user']['resources']['logout']['uri']
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# TODO: figure out authentication with cookies
if username is None or password is None:
self.raise_login_required()

View File

@@ -3,12 +3,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
mimetype2ext,
parse_duration,
qualities,
url_or_none,
)
@@ -61,10 +61,11 @@ class ImdbIE(InfoExtractor):
for encoding in video_metadata.get('encodings', []):
if not encoding or not isinstance(encoding, dict):
continue
video_url = encoding.get('videoUrl')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(encoding.get('videoUrl'))
if not video_url:
continue
ext = determine_ext(video_url, mimetype2ext(encoding.get('mimeType')))
ext = mimetype2ext(encoding.get(
'mimeType')) or determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@@ -12,7 +12,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z]+)?$'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -43,6 +43,9 @@ class ImgurIE(InfoExtractor):
}, {
'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True,
}, {
'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True,
}]
def _real_extract(self, url):

View File

@@ -21,6 +21,21 @@ class IncIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# div with id=kaltura_player_1_kqs38cgm
'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html',
'info_dict': {
'id': '1_kqs38cgm',
'ext': 'mp4',
'title': 'Branson: "In the end, you have to say, Screw it. Just do it."',
'description': 'md5:21b832d034f9af5191ca5959da5e9cb6',
'timestamp': 1364403232,
'upload_date': '20130327',
'uploader_id': 'incdigital@inc.com',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html',
'only_matching': True,
@@ -31,10 +46,13 @@ class IncIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
partner_id = self._search_regex(
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage, 'partner id')
r'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d+)', webpage,
'partner id', default='1034971')
kaltura_id = self._parse_json(self._search_regex(
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
kaltura_id = self._search_regex(
r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id',
default=None, group='id') or self._parse_json(self._search_regex(
r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'),
display_id)['vid_kaltura_id']
return self.url_result(

View File

@@ -17,6 +17,7 @@ from ..utils import (
lowercase_escape,
std_headers,
try_get,
url_or_none,
)
@@ -170,7 +171,7 @@ class InstagramIE(InfoExtractor):
node = try_get(edge, lambda x: x['node'], dict)
if not node:
continue
node_video_url = try_get(node, lambda x: x['video_url'], compat_str)
node_video_url = url_or_none(node.get('video_url'))
if not node_video_url:
continue
entries.append({

View File

@@ -7,7 +7,7 @@ from ..utils import unified_timestamp
class InternazionaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
_TESTS = [{
'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
'md5': '3e39d32b66882c1218e305acbf8348ca',
'info_dict': {
@@ -23,7 +23,23 @@ class InternazionaleIE(InfoExtractor):
'params': {
'format': 'bestvideo',
},
}
}, {
'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
'md5': '9db8663704cab73eb972d1cee0082c79',
'info_dict': {
'id': '761344',
'display_id': 'telefono-stare-con-noi-stessi',
'ext': 'mp4',
'title': 'Usiamo il telefono per evitare di stare con noi stessi',
'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
'timestamp': 1535528954,
'upload_date': '20180829',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'format': 'bestvideo',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -40,8 +56,13 @@ class InternazionaleIE(InfoExtractor):
DATA_RE % 'job-id', webpage, 'video id', group='value')
video_path = self._search_regex(
DATA_RE % 'video-path', webpage, 'video path', group='value')
video_available_abroad = self._search_regex(
DATA_RE % 'video-available_abroad', webpage,
'video available aboard', default='1', group='value')
video_available_abroad = video_available_abroad == '1'
video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
video_base = 'https://video%s.internazionale.it/%s/%s.' % \
('' if video_available_abroad else '-ita', video_path, video_id)
formats = self._extract_m3u8_formats(
video_base + 'm3u8', display_id, 'mp4',

View File

@@ -239,7 +239,7 @@ class IqiyiIE(InfoExtractor):
return ohdave_rsa_encrypt(data, e, N)
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# No authentication to be performed
if not username:

View File

@@ -13,15 +13,17 @@ from ..compat import (
compat_etree_register_namespace,
)
from ..utils import (
determine_ext,
ExtractorError,
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
smuggle_url,
url_or_none,
xpath_with_ns,
xpath_element,
xpath_text,
int_or_none,
parse_duration,
smuggle_url,
ExtractorError,
determine_ext,
)
@@ -129,64 +131,65 @@ class ITVIE(InfoExtractor):
resp_env = self._download_xml(
params['data-playlist-url'], video_id,
headers=headers, data=etree.tostring(req_env))
playlist = xpath_element(resp_env, './/Playlist')
if playlist is None:
fault_code = xpath_text(resp_env, './/faultcode')
fault_string = xpath_text(resp_env, './/faultstring')
if fault_code == 'InvalidGeoRegion':
self.raise_geo_restricted(
msg=fault_string, countries=self._GEO_COUNTRIES)
elif fault_code not in (
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
info.update({
'title': self._og_search_title(webpage),
'episode_title': params.get('data-video-episode'),
'series': params.get('data-video-title'),
})
else:
title = xpath_text(playlist, 'EpisodeTitle', default=None)
info.update({
'title': title,
'episode_title': title,
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
'series': xpath_text(playlist, 'ProgrammeTitle'),
'duration': parse_duration(xpath_text(playlist, 'Duration')),
})
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
rtmp_url = media_files.attrib['base']
headers=headers, data=etree.tostring(req_env), fatal=False)
if resp_env:
playlist = xpath_element(resp_env, './/Playlist')
if playlist is None:
fault_code = xpath_text(resp_env, './/faultcode')
fault_string = xpath_text(resp_env, './/faultstring')
if fault_code == 'InvalidGeoRegion':
self.raise_geo_restricted(
msg=fault_string, countries=self._GEO_COUNTRIES)
elif fault_code not in (
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
info.update({
'title': self._og_search_title(webpage),
'episode_title': params.get('data-video-episode'),
'series': params.get('data-video-title'),
})
else:
title = xpath_text(playlist, 'EpisodeTitle', default=None)
info.update({
'title': title,
'episode_title': title,
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
'series': xpath_text(playlist, 'ProgrammeTitle'),
'duration': parse_duration(xpath_text(playlist, 'Duration')),
})
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
rtmp_url = media_files.attrib['base']
for media_file in media_files.findall('MediaFile'):
play_path = xpath_text(media_file, 'URL')
if not play_path:
continue
tbr = int_or_none(media_file.get('bitrate'), 1000)
f = {
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
'play_path': play_path,
# Providing this swfVfy allows to avoid truncated downloads
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
'page_url': url,
'tbr': tbr,
'ext': 'flv',
}
app = self._search_regex(
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
if app:
f.update({
'url': rtmp_url.split('?', 1)[0],
'app': app,
})
else:
f['url'] = rtmp_url
formats.append(f)
for media_file in media_files.findall('MediaFile'):
play_path = xpath_text(media_file, 'URL')
if not play_path:
continue
tbr = int_or_none(media_file.get('bitrate'), 1000)
f = {
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
'play_path': play_path,
# Providing this swfVfy allows to avoid truncated downloads
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
'page_url': url,
'tbr': tbr,
'ext': 'flv',
}
app = self._search_regex(
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
if app:
f.update({
'url': rtmp_url.split('?', 1)[0],
'app': app,
})
else:
f['url'] = rtmp_url
formats.append(f)
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
if caption_url.text:
extract_subtitle(caption_url.text)
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
if caption_url.text:
extract_subtitle(caption_url.text)
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
hmac = params.get('data-video-hmac')
@@ -248,8 +251,8 @@ class ITVIE(InfoExtractor):
for sub in subs:
if not isinstance(sub, dict):
continue
href = sub.get('Href')
if isinstance(href, compat_str):
href = url_or_none(sub.get('Href'))
if href:
extract_subtitle(href)
if not info.get('duration'):
info['duration'] = parse_duration(video_data.get('Duration'))
@@ -261,7 +264,17 @@ class ITVIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
})
return info
webpage_info = self._search_json_ld(webpage, video_id, default={})
if not webpage_info.get('title'):
webpage_info['title'] = self._html_search_regex(
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
default=None) or webpage_info['episode']
return merge_dicts(info, webpage_info)
class ITVBTCCIE(InfoExtractor):

View File

@@ -7,6 +7,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
remove_end,
url_or_none,
)
@@ -73,11 +74,14 @@ class IwaraIE(InfoExtractor):
formats = []
for a_format in video_data:
format_uri = url_or_none(a_format.get('uri'))
if not format_uri:
continue
format_id = a_format.get('resolution')
height = int_or_none(self._search_regex(
r'(\d+)p', format_id, 'height', default=None))
formats.append({
'url': a_format['uri'],
'url': self._proto_relative_url(format_uri, 'https:'),
'format_id': format_id,
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
'height': height,

View File

@@ -18,7 +18,7 @@ class JojIE(InfoExtractor):
joj:|
https?://media\.joj\.sk/embed/
)
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
(?P<id>[^/?#^]+)
'''
_TESTS = [{
'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
@@ -29,16 +29,24 @@ class JojIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3118,
}
}, {
'url': 'https://media.joj.sk/embed/9i1cxv',
'only_matching': True,
}, {
'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
'only_matching': True,
}, {
'url': 'joj:9i1cxv',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//media\.joj\.sk/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
webpage)
return [
mobj.group('url')
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)

View File

@@ -4,16 +4,14 @@ import re
from .common import InfoExtractor
from ..aes import aes_decrypt_text
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
str_to_int,
strip_or_none,
url_or_none,
)
@@ -55,7 +53,8 @@ class KeezMoviesIE(InfoExtractor):
encrypted = False
def extract_format(format_url, height=None):
if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//')):
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//')):
return
if format_url in format_urls:
return

View File

@@ -0,0 +1,70 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
dict_get,
int_or_none,
)
class KinoPoiskIE(InfoExtractor):
_GEO_COUNTRIES = ['RU']
_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.kinopoisk.ru/film/81041/watch/',
'md5': '4f71c80baea10dfa54a837a46111d326',
'info_dict': {
'id': '81041',
'ext': 'mp4',
'title': 'Алеша попович и тугарин змей',
'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
'thumbnail': r're:^https?://.*',
'duration': 4533,
'age_limit': 12,
},
'params': {
'format': 'bestvideo',
},
}, {
'url': 'https://www.kinopoisk.ru/film/81041',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
query={'kpId': video_id})
data = self._parse_json(
self._search_regex(
r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
webpage, 'data'),
video_id)['models']
film = data['filmStatus']
title = film.get('title') or film['originalTitle']
formats = self._extract_m3u8_formats(
data['playlistEntity']['uri'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
self._sort_formats(formats)
description = dict_get(
film, ('descriptscription', 'description',
'shortDescriptscription', 'shortDescription'))
thumbnail = film.get('coverUrl') or film.get('posterUrl')
duration = int_or_none(film.get('duration'))
age_limit = int_or_none(film.get('restrictionAge'))
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'age_limit': age_limit,
'formats': formats,
}

View File

@@ -2,11 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
url_or_none,
)
@@ -109,7 +109,8 @@ class KonserthusetPlayIE(InfoExtractor):
captions = source.get('captionsAvailableLanguages')
if isinstance(captions, dict):
for lang, subtitle_url in captions.items():
if lang != 'none' and isinstance(subtitle_url, compat_str):
subtitle_url = url_or_none(subtitle_url)
if lang != 'none' and subtitle_url:
subtitles.setdefault(lang, []).append({'url': subtitle_url})
return {

View File

@@ -20,5 +20,7 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
wat_id = self._search_regex(r'data-watid=[\'"](\d+)', webpage, 'wat id')
wat_id = self._search_regex(
(r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'),
webpage, 'wat id')
return self.url_result('wat:' + wat_id, 'Wat', wat_id)

View File

@@ -4,7 +4,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
compat_urlparse,
)
@@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor):
form_data = self._hidden_inputs(form_html)
form_data.update(extra_form_data)
try:
response = self._download_json(
action_url, None, note,
data=urlencode_postdata(form_data),
headers={
'Referer': referrer_url,
'X-Requested-With': 'XMLHttpRequest',
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
response = self._parse_json(e.cause.read().decode('utf-8'), None)
self._check_error(response, ('email', 'password'))
raise
response = self._download_json(
action_url, None, note,
data=urlencode_postdata(form_data),
headers={
'Referer': referrer_url,
'X-Requested-With': 'XMLHttpRequest',
}, expected_status=(418, 500, ))
self._check_error(response, 'ErrorMessage')
self._check_error(response, ('email', 'password', 'ErrorMessage'))
return response, action_url

View File

@@ -0,0 +1,125 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
orderedSet,
parse_duration,
try_get,
)
class MarkizaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
_TESTS = [{
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
'md5': 'ada4e9fad038abeed971843aa028c7b0',
'info_dict': {
'id': '139078',
'ext': 'mp4',
'title': 'Oteckovia 109',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2760,
},
}, {
'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
'info_dict': {
'id': '85430',
'title': 'Televízne noviny',
},
'playlist_count': 23,
}, {
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/video/84723',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
'only_matching': True,
}, {
'url': 'http://videoarchiv.markiza.sk/embed/85295',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
video_id, query={'id': video_id})
info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
if info.get('_type') == 'playlist':
info.update({
'id': video_id,
'title': try_get(
data, lambda x: x['details']['name'], compat_str),
})
else:
info['duration'] = parse_duration(
try_get(data, lambda x: x['details']['duration'], compat_str))
return info
class MarkizaPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
_TESTS = [{
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
'md5': 'ada4e9fad038abeed971843aa028c7b0',
'info_dict': {
'id': '139355',
'ext': 'mp4',
'title': 'Oteckovia 110',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2604,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
'only_matching': True,
}, {
'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
'only_matching': True,
}, {
'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
'only_matching': True,
}, {
'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
'only_matching': True,
}, {
'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(
# Downloading for some hosts (e.g. dajto, doma) fails with 500
# although everything seems to be OK, so considering 500
# status code to be expected.
url, playlist_id, expected_status=500)
entries = [
self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
for video_id in orderedSet(re.findall(
r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
webpage))]
return self.playlist_result(entries, playlist_id)

View File

@@ -3,59 +3,75 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from .theplatform import ThePlatformBaseIE
from ..utils import (
determine_ext,
parse_duration,
try_get,
unified_strdate,
ExtractorError,
int_or_none,
update_url_query,
)
class MediasetIE(InfoExtractor):
class MediasetIE(ThePlatformBaseIE):
_TP_TLD = 'eu'
_VALID_URL = r'''(?x)
(?:
mediaset:|
https?://
(?:www\.)?video\.mediaset\.it/
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?:
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=
player/index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9]+)
)(?P<id>[0-9A-Z]{16})
'''
_TESTS = [{
# full episode
'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html',
'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
'info_dict': {
'id': '661824',
'id': 'FAFU000000661824',
'ext': 'mp4',
'title': 'Quarta puntata',
'description': 'md5:7183696d6df570e3412a5ef74b27c5e2',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1414,
'creator': 'mediaset',
'duration': 1414.26,
'upload_date': '20161107',
'series': 'Hello Goodbye',
'categories': ['reality'],
'timestamp': 1478532900,
'uploader': 'Rete 4',
'uploader_id': 'R4',
},
'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
'md5': '288532f0ad18307705b01e581304cd7b',
'info_dict': {
'id': 'F309013801000501',
'ext': 'mp4',
'title': 'Puntata del 25 maggio',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 6565.007,
'upload_date': '20180526',
'series': 'Matrix',
'timestamp': 1527326245,
'uploader': 'Canale 5',
'uploader_id': 'C5',
},
'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
# clip
'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
'only_matching': True,
}, {
# iframe simple
'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true',
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
'only_matching': True,
}, {
# iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true',
'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
'only_matching': True,
}, {
'url': 'mediaset:661824',
'url': 'mediaset:FAFU000000665924',
'only_matching': True,
}]
@@ -68,51 +84,54 @@ class MediasetIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
video_list = self._download_json(
'http://cdnsel01.mediaset.net/GetCdn.aspx',
video_id, 'Downloading video CDN JSON', query={
'streamid': video_id,
'format': 'json',
})['videoList']
guid = self._match_id(url)
tp_path = 'PR1GhC/media/guid/2702976343/' + guid
info = self._extract_theplatform_metadata(tp_path, guid)
formats = []
for format_url in video_list:
if '.ism' in format_url:
formats.extend(self._extract_ism_formats(
format_url, video_id, ism_id='mss', fatal=False))
else:
formats.append({
'url': format_url,
'format_id': determine_ext(format_url),
})
subtitles = {}
first_e = None
for asset_type in ('SD', 'HD'):
for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
'mbr': 'true',
'formats': f,
'assetTypes': asset_type,
}), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
except ExtractorError as e:
if not first_e:
first_e = e
break
for tp_f in tp_formats:
tp_f['quality'] = 1 if asset_type == 'HD' else 0
formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if first_e and not formats:
raise first_e
self._sort_formats(formats)
mediainfo = self._download_json(
'http://plr.video.mediaset.it/html/metainfo.sjson',
video_id, 'Downloading video info JSON', query={
'id': video_id,
})['video']
fields = []
for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
fields.extend(templ % repl for repl in repls)
feed_data = self._download_json(
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
guid, fatal=False, query={'fields': ','.join(fields)})
if feed_data:
publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
info.update({
'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
'series': feed_data.get('mediasetprogram$brandTitle'),
'uploader': publish_info.get('description'),
'uploader_id': publish_info.get('channel'),
'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
})
title = mediainfo['title']
creator = try_get(
mediainfo, lambda x: x['brand-info']['publisher'], compat_str)
category = try_get(
mediainfo, lambda x: x['brand-info']['category'], compat_str)
categories = [category] if category else None
return {
'id': video_id,
'title': title,
'description': mediainfo.get('short-description'),
'thumbnail': mediainfo.get('thumbnail'),
'duration': parse_duration(mediainfo.get('duration')),
'creator': creator,
'upload_date': unified_strdate(mediainfo.get('production-date')),
'webpage_url': mediainfo.get('url'),
'series': mediainfo.get('brand-value'),
'categories': categories,
info.update({
'id': guid,
'formats': formats,
}
'subtitles': subtitles,
})
return info

View File

@@ -15,6 +15,7 @@ from ..utils import (
mimetype2ext,
unescapeHTML,
unsmuggle_url,
url_or_none,
urljoin,
)
@@ -156,8 +157,8 @@ class MediasiteIE(InfoExtractor):
stream_formats = []
for unum, VideoUrl in enumerate(video_urls):
video_url = VideoUrl.get('Location')
if not video_url or not isinstance(video_url, compat_str):
video_url = url_or_none(VideoUrl.get('Location'))
if not video_url:
continue
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS

View File

@@ -1,84 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import uuid
from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import (
int_or_none,
extract_attributes,
determine_ext,
smuggle_url,
parse_duration,
)
class MiTeleBaseIE(InfoExtractor):
def _get_player_info(self, url, webpage):
player_data = extract_attributes(self._search_regex(
r'(?s)(<ms-video-player.+?</ms-video-player>)',
webpage, 'ms video player'))
video_id = player_data['data-media-id']
if player_data.get('data-cms-id') == 'ooyala':
return self.url_result(
'ooyala:%s' % video_id, ie=OoyalaIE.ie_key(), video_id=video_id)
config_url = compat_urlparse.urljoin(url, player_data['data-config'])
config = self._download_json(
config_url, video_id, 'Downloading config JSON')
mmc_url = config['services']['mmc']
duration = None
formats = []
for m_url in (mmc_url, mmc_url.replace('/flash.json', '/html5.json')):
mmc = self._download_json(
m_url, video_id, 'Downloading mmc JSON')
if not duration:
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
gcp = location.get('gcp')
ogn = location.get('ogn')
if None in (gat, gcp, ogn):
continue
token_data = {
'gcp': gcp,
'ogn': ogn,
'sta': 0,
}
media = self._download_json(
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
})
stream = media.get('stream') or media.get('file')
if not stream:
continue
ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
stream, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'thumbnail': player_data.get('data-poster') or config.get('poster', {}).get('imageUrl'),
'duration': duration,
}
class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@@ -86,7 +16,7 @@ class MiTeleIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
'info_dict': {
'id': '57b0dfb9c715da65618b4afa',
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
'ext': 'mp4',
'title': 'Tor, la web invisible',
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
@@ -104,7 +34,7 @@ class MiTeleIE(InfoExtractor):
# no explicit title
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
'info_dict': {
'id': '57b0de3dc915da14058b4876',
'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
'ext': 'mp4',
'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
@@ -128,40 +58,21 @@ class MiTeleIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
gigya_url = self._search_regex(
r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key
appKey = self._search_regex(
r'constant\s*\(\s*["\']_appGridApplicationKey["\']\s*,\s*["\']([0-9a-f]+)',
gigya_sc, 'appKey')
session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session',
video_id, 'Downloading session keys', query={
'appKey': appKey,
'uuid': compat_str(uuid.uuid4()),
})
paths = self._download_json(
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
video_id, 'Downloading paths JSON',
query={'sessionKey': compat_str(session_json['sessionKey'])})
'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
video_id, 'Downloading paths JSON')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
source = self._download_json(
'http://%s%s%s/docs/%s' % (
ooyala_s['base_url'], ooyala_s['full_path'],
ooyala_s['provider_id'], video_id),
'%s://%s%s%s/docs/%s' % (
ooyala_s.get('protocol', 'https'), base_url, full_path,
ooyala_s.get('provider_id', '104951'), video_id),
video_id, 'Downloading data JSON', query={
'include_titles': 'Series,Season',
'product_name': 'test',
'product_name': ooyala_s.get('product_name', 'test'),
'format': 'full',
})['hits']['hits'][0]['_source']

View File

@@ -1,96 +1,90 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
parse_iso8601,
)
from .nhl import NHLBaseIE
class MLBIE(InfoExtractor):
class MLBIE(NHLBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:[\da-z_-]+\.)*mlb\.com/
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
(?:
(?:
(?:.*?/)?video/(?:topic/[\da-z_-]+/)?(?:v|.*?/c-)|
(?:[^/]+/)*c-|
(?:
shared/video/embed/(?:embed|m-internal-embed)\.html|
(?:[^/]+/)+(?:play|index)\.jsp|
)\?.*?\bcontent_id=
)
(?P<id>n?\d+)|
(?:[^/]+/)*(?P<path>[^/]+)
(?P<id>\d+)
)
'''
_CONTENT_DOMAIN = 'content.mlb.com'
_TESTS = [
{
'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
'md5': 'ff56a598c2cf411a9a38a69709e97079',
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
'md5': '632358dacfceec06bad823b83d21df2d',
'info_dict': {
'id': '34698933',
'ext': 'mp4',
'title': "Ackley's spectacular catch",
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
'duration': 66,
'timestamp': 1405980600,
'upload_date': '20140721',
'timestamp': 1405995000,
'upload_date': '20140722',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f',
'info_dict': {
'id': '34496663',
'ext': 'mp4',
'title': 'Stanton prepares for Derby',
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
'duration': 46,
'timestamp': 1405105800,
'timestamp': 1405120200,
'upload_date': '20140711',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
'md5': '0e6e73d509321e142409b695eadd541f',
'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
'md5': '99bb9176531adc600b90880fb8be9328',
'info_dict': {
'id': '34578115',
'ext': 'mp4',
'title': 'Cespedes repeats as Derby champ',
'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
'duration': 488,
'timestamp': 1405399936,
'timestamp': 1405414336,
'upload_date': '20140715',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
'md5': 'b8fd237347b844365d74ea61d4245967',
'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
'md5': 'da8b57a12b060e7663ee1eebd6f330ec',
'info_dict': {
'id': '34577915',
'ext': 'mp4',
'title': 'Bautista on Home Run Derby',
'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
'duration': 52,
'timestamp': 1405390722,
'timestamp': 1405405122,
'upload_date': '20140715',
'thumbnail': r're:^https?://.*\.jpg$',
},
},
{
'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
'md5': 'aafaf5b0186fee8f32f20508092f8111',
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
'md5': 'e09e37b552351fddbf4d9e699c924d68',
'info_dict': {
'id': '75609783',
'ext': 'mp4',
'title': 'Must C: Pillar climbs for catch',
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
'timestamp': 1429124820,
'timestamp': 1429139220,
'upload_date': '20150415',
}
},
@@ -111,7 +105,7 @@ class MLBIE(InfoExtractor):
'only_matching': True,
},
{
'url': 'http://m.cardinals.mlb.com/stl/video/v51175783/atlstl-piscotty-makes-great-sliding-catch-on-line/?partnerId=as_mlb_20150321_42500876&adbid=579409712979910656&adbpl=tw&adbpr=52847728',
'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
'only_matching': True,
},
{
@@ -120,58 +114,7 @@ class MLBIE(InfoExtractor):
'only_matching': True,
},
{
'url': 'http://washington.nationals.mlb.com/mlb/gameday/index.jsp?c_id=was&gid=2015_05_09_atlmlb_wasmlb_1&lang=en&content_id=108309983&mode=video#',
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
'only_matching': True,
}
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
if not video_id:
video_path = mobj.group('path')
webpage = self._download_webpage(url, video_path)
video_id = self._search_regex(
[r'data-video-?id="(\d+)"', r'content_id=(\d+)'], webpage, 'video id')
detail = self._download_xml(
'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
% (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
title = detail.find('./headline').text
description = detail.find('./big-blurb').text
duration = parse_duration(detail.find('./duration').text)
timestamp = parse_iso8601(detail.attrib['date'][:-5])
thumbnails = [{
'url': thumbnail.text,
} for thumbnail in detail.findall('./thumbnailScenarios/thumbnailScenario')]
formats = []
for media_url in detail.findall('./url'):
playback_scenario = media_url.attrib['playback_scenario']
fmt = {
'url': media_url.text,
'format_id': playback_scenario,
}
m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
if m:
fmt.update({
'vbr': int(m.group('vbr')) * 1000,
'width': int(m.group('width')),
'height': int(m.group('height')),
})
formats.append(fmt)
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'formats': formats,
'thumbnails': thumbnails,
}

View File

@@ -77,8 +77,11 @@ class MotherlessIE(InfoExtractor):
title = self._html_search_regex(
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
video_url = self._html_search_regex(
r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video URL')
video_url = (self._html_search_regex(
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'video URL', default=None, group='url') or
'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex(
r'<strong>Views</strong>\s+([^<]+)<',
@@ -120,7 +123,7 @@ class MotherlessIE(InfoExtractor):
class MotherlessGroupIE(InfoExtractor):
_VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
_TESTS = [{
'url': 'http://motherless.com/g/movie_scenes',
'info_dict': {

View File

@@ -29,14 +29,13 @@ class NexxIE(InfoExtractor):
_TESTS = [{
# movie
'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
'md5': '828cea195be04e66057b846288295ba1',
'md5': '31899fd683de49ad46f4ee67e53e83fe',
'info_dict': {
'id': '128907',
'ext': 'mp4',
'title': 'Stiftung Warentest',
'alt_title': 'Wie ein Test abläuft',
'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
'release_year': 2013,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2509,
@@ -62,6 +61,7 @@ class NexxIE(InfoExtractor):
'params': {
'skip_download': True,
},
'skip': 'HTTP Error 404: Not Found',
}, {
# does not work via arc
'url': 'nexx:741:1269984',
@@ -71,12 +71,26 @@ class NexxIE(InfoExtractor):
'ext': 'mp4',
'title': '1 TAG ohne KLO... wortwörtlich! 😑',
'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
'description': 'md5:4604539793c49eda9443ab5c5b1d612f',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 607,
'timestamp': 1518614955,
'upload_date': '20180214',
},
}, {
# free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
'url': 'nexx:747:1533779',
'md5': '6bf6883912b82b7069fb86c2297e9893',
'info_dict': {
'id': '1533779',
'ext': 'mp4',
'title': 'Aufregung um ausgebrochene Raubtiere',
'alt_title': 'Eifel-Zoo',
'description': 'md5:f21375c91c74ad741dcb164c427999d2',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 111,
'timestamp': 1527874460,
'upload_date': '20180601',
},
}, {
'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
'only_matching': True,
@@ -141,6 +155,139 @@ class NexxIE(InfoExtractor):
self._handle_error(result)
return result['result']
def _extract_free_formats(self, video, video_id):
stream_data = video['streamdata']
cdn = stream_data['cdnType']
assert cdn == 'free'
hash = video['general']['hash']
ps = compat_str(stream_data['originalDomain'])
if stream_data['applyFolderHierarchy'] == 1:
s = ('%04d' % int(video_id))[::-1]
ps += '/%s/%s' % (s[0:2], s[2:4])
ps += '/%s/%s_' % (video_id, hash)
t = 'http://%s' + ps
fd = stream_data['azureFileDistribution'].split(',')
cdn_provider = stream_data['cdnProvider']
def p0(p):
return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
formats = []
if cdn_provider == 'ak':
t += ','
for i in fd:
p = i.split(':')
t += p[1] + p0(int(p[0])) + ','
t += '.mp4.csmil/master.%s'
elif cdn_provider == 'ce':
k = t.split('/')
h = k.pop()
http_base = t = '/'.join(k)
http_base = http_base % stream_data['cdnPathHTTP']
t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
for i in fd:
p = i.split(':')
tbr = int(p[0])
filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
f = {
'url': http_base + '/' + filename,
'format_id': '%s-http-%d' % (cdn, tbr),
'tbr': tbr,
}
width_height = p[1].split('x')
if len(width_height) == 2:
f.update({
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
})
formats.append(f)
a = filename + ':%s' % (tbr * 1000)
t += a + ','
t = t[:-1] + '&audiostream=' + a.split(':')[0]
else:
assert False
if cdn_provider == 'ce':
formats.extend(self._extract_mpd_formats(
t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
mpd_id='%s-dash' % cdn, fatal=False))
formats.extend(self._extract_m3u8_formats(
t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
return formats
def _extract_azure_formats(self, video, video_id):
stream_data = video['streamdata']
cdn = stream_data['cdnType']
assert cdn == 'azure'
azure_locator = stream_data['azureLocator']
def get_cdn_shield_base(shield_type='', static=False):
for secure in ('', 's'):
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
if cdn_shield:
return 'http%s://%s' % (secure, cdn_shield)
else:
if 'fb' in stream_data['azureAccount']:
prefix = 'df' if static else 'f'
else:
prefix = 'd' if static else 'p'
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
language = video['general'].get('language_raw') or ''
azure_stream_base = get_cdn_shield_base()
is_ml = ',' in language
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
protection_token = try_get(
video, lambda x: x['protectiondata']['token'], compat_str)
if protection_token:
azure_manifest_url += '?hdnts=%s' % protection_token
formats = self._extract_m3u8_formats(
azure_manifest_url % '(format=m3u8-aapl)',
video_id, 'mp4', 'm3u8_native',
m3u8_id='%s-hls' % cdn, fatal=False)
formats.extend(self._extract_mpd_formats(
azure_manifest_url % '(format=mpd-time-csf)',
video_id, mpd_id='%s-dash' % cdn, fatal=False))
formats.extend(self._extract_ism_formats(
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
azure_progressive_base = get_cdn_shield_base('Prog', True)
azure_file_distribution = stream_data.get('azureFileDistribution')
if azure_file_distribution:
fds = azure_file_distribution.split(',')
if fds:
for fd in fds:
ss = fd.split(':')
if len(ss) == 2:
tbr = int_or_none(ss[0])
if tbr:
f = {
'url': '%s%s/%s_src_%s_%d.mp4' % (
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
'format_id': '%s-http-%d' % (cdn, tbr),
'tbr': tbr,
}
width_height = ss[1].split('x')
if len(width_height) == 2:
f.update({
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
})
formats.append(f)
return formats
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
@@ -220,72 +367,15 @@ class NexxIE(InfoExtractor):
general = video['general']
title = general['title']
stream_data = video['streamdata']
language = general.get('language_raw') or ''
cdn = video['streamdata']['cdnType']
# TODO: reverse more cdns
cdn = stream_data['cdnType']
assert cdn == 'azure'
azure_locator = stream_data['azureLocator']
def get_cdn_shield_base(shield_type='', static=False):
for secure in ('', 's'):
cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
if cdn_shield:
return 'http%s://%s' % (secure, cdn_shield)
else:
if 'fb' in stream_data['azureAccount']:
prefix = 'df' if static else 'f'
else:
prefix = 'd' if static else 'p'
account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
azure_stream_base = get_cdn_shield_base()
is_ml = ',' in language
azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
protection_token = try_get(
video, lambda x: x['protectiondata']['token'], compat_str)
if protection_token:
azure_manifest_url += '?hdnts=%s' % protection_token
formats = self._extract_m3u8_formats(
azure_manifest_url % '(format=m3u8-aapl)',
video_id, 'mp4', 'm3u8_native',
m3u8_id='%s-hls' % cdn, fatal=False)
formats.extend(self._extract_mpd_formats(
azure_manifest_url % '(format=mpd-time-csf)',
video_id, mpd_id='%s-dash' % cdn, fatal=False))
formats.extend(self._extract_ism_formats(
azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
azure_progressive_base = get_cdn_shield_base('Prog', True)
azure_file_distribution = stream_data.get('azureFileDistribution')
if azure_file_distribution:
fds = azure_file_distribution.split(',')
if fds:
for fd in fds:
ss = fd.split(':')
if len(ss) == 2:
tbr = int_or_none(ss[0])
if tbr:
f = {
'url': '%s%s/%s_src_%s_%d.mp4' % (
azure_progressive_base, azure_locator, video_id, ss[1], tbr),
'format_id': '%s-http-%d' % (cdn, tbr),
'tbr': tbr,
}
width_height = ss[1].split('x')
if len(width_height) == 2:
f.update({
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
})
formats.append(f)
if cdn == 'azure':
formats = self._extract_azure_formats(video, video_id)
elif cdn == 'free':
formats = self._extract_free_formats(video, video_id)
else:
# TODO: reverse more cdns
assert False
self._sort_formats(formats)

View File

@@ -1,18 +1,10 @@
from __future__ import unicode_literals
import re
import json
import os
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_str,
)
from ..compat import compat_str
from ..utils import (
unified_strdate,
determine_ext,
int_or_none,
parse_iso8601,
@@ -20,236 +12,77 @@ from ..utils import (
)
class NHLBaseInfoExtractor(InfoExtractor):
@staticmethod
def _fix_json(json_string):
return json_string.replace('\\\'', '\'')
class NHLBaseIE(InfoExtractor):
def _real_extract(self, url):
site, tmp_id = re.match(self._VALID_URL, url).groups()
video_data = self._download_json(
'https://%s/%s/%sid/v1/%s/details/web-v1.json'
% (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id)
if video_data.get('type') != 'video':
video_data = video_data['media']
video = video_data.get('video')
if video:
video_data = video
else:
videos = video_data.get('videos')
if videos:
video_data = videos[0]
def _real_extract_video(self, video_id):
vid_parts = video_id.split(',')
if len(vid_parts) == 3:
video_id = '%s0%s%s-X-h' % (vid_parts[0][:4], vid_parts[1], vid_parts[2].rjust(4, '0'))
json_url = 'http://video.nhl.com/videocenter/servlets/playlist?ids=%s&format=json' % video_id
data = self._download_json(
json_url, video_id, transform_source=self._fix_json)
return self._extract_video(data[0])
video_id = compat_str(video_data['id'])
title = video_data['title']
def _extract_video(self, info):
video_id = info['id']
self.report_extraction(video_id)
formats = []
for playback in video_data.get('playbacks', []):
playback_url = playback.get('url')
if not playback_url:
continue
ext = determine_ext(playback_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=playback.get('name', 'hls'), fatal=False)
self._check_formats(m3u8_formats, video_id)
formats.extend(m3u8_formats)
else:
height = int_or_none(playback.get('height'))
formats.append({
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
'url': playback_url,
'width': int_or_none(playback.get('width')),
'height': height,
'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
})
self._sort_formats(formats)
initial_video_url = info['publishPoint']
if info['formats'] == '1':
parsed_url = compat_urllib_parse_urlparse(initial_video_url)
filename, ext = os.path.splitext(parsed_url.path)
path = '%s_sd%s' % (filename, ext)
data = compat_urllib_parse_urlencode({
'type': 'fvod',
'path': compat_urlparse.urlunparse(parsed_url[:2] + (path,) + parsed_url[3:])
thumbnails = []
cuts = video_data.get('image', {}).get('cuts') or []
if isinstance(cuts, dict):
cuts = cuts.values()
for thumbnail_data in cuts:
thumbnail_url = thumbnail_data.get('src')
if not thumbnail_url:
continue
thumbnails.append({
'url': thumbnail_url,
'width': int_or_none(thumbnail_data.get('width')),
'height': int_or_none(thumbnail_data.get('height')),
})
path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
path_doc = self._download_xml(
path_url, video_id, 'Downloading final video url')
video_url = path_doc.find('path').text
else:
video_url = initial_video_url
join = compat_urlparse.urljoin
ret = {
'id': video_id,
'title': info['name'],
'url': video_url,
'description': info['description'],
'duration': int(info['duration']),
'thumbnail': join(join(video_url, '/u/'), info['bigImage']),
'upload_date': unified_strdate(info['releaseDate'].split('.')[0]),
}
if video_url.startswith('rtmp:'):
mobj = re.match(r'(?P<tc_url>rtmp://[^/]+/(?P<app>[a-z0-9/]+))/(?P<play_path>mp4:.*)', video_url)
ret.update({
'tc_url': mobj.group('tc_url'),
'play_path': mobj.group('play_path'),
'app': mobj.group('app'),
'no_resume': True,
})
return ret
class NHLVideocenterIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter'
_VALID_URL = r'https?://video(?P<team>\.[^.]*)?\.nhl\.com/videocenter/(?:console|embed)?(?:\?(?:.*?[?&])?)(?:id|hlg|playlist)=(?P<id>[-0-9a-zA-Z,]+)'
_TESTS = [{
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=6?id=453614',
'md5': 'db704a4ea09e8d3988c85e36cc892d09',
'info_dict': {
'id': '453614',
'ext': 'mp4',
'title': 'Quick clip: Weise 4-3 goal vs Flames',
'description': 'Dale Weise scores his first of the season to put the Canucks up 4-3.',
'duration': 18,
'upload_date': '20131006',
},
}, {
'url': 'http://video.nhl.com/videocenter/console?id=2014020024-628-h',
'md5': 'd22e82bc592f52d37d24b03531ee9696',
'info_dict': {
'id': '2014020024-628-h',
'ext': 'mp4',
'title': 'Alex Galchenyuk Goal on Ray Emery (14:40/3rd)',
'description': 'Home broadcast - Montreal Canadiens at Philadelphia Flyers - October 11, 2014',
'duration': 0,
'upload_date': '20141011',
},
}, {
'url': 'http://video.mapleleafs.nhl.com/videocenter/console?id=58665&catid=802',
'md5': 'c78fc64ea01777e426cfc202b746c825',
'info_dict': {
'id': '58665',
'ext': 'flv',
'title': 'Classic Game In Six - April 22, 1979',
'description': 'It was the last playoff game for the Leafs in the decade, and the last time the Leafs and Habs played in the playoffs. Great game, not a great ending.',
'duration': 400,
'upload_date': '20100129'
},
}, {
'url': 'http://video.flames.nhl.com/videocenter/console?id=630616',
'only_matching': True,
}, {
'url': 'http://video.nhl.com/videocenter/?id=736722',
'only_matching': True,
}, {
'url': 'http://video.nhl.com/videocenter/console?hlg=20142015,2,299&lang=en',
'md5': '076fcb88c255154aacbf0a7accc3f340',
'info_dict': {
'id': '2014020299-X-h',
'ext': 'mp4',
'title': 'Penguins at Islanders / Game Highlights',
'description': 'Home broadcast - Pittsburgh Penguins at New York Islanders - November 22, 2014',
'duration': 268,
'upload_date': '20141122',
}
}, {
'url': 'http://video.oilers.nhl.com/videocenter/console?id=691469&catid=4',
'info_dict': {
'id': '691469',
'ext': 'mp4',
'title': 'RAW | Craig MacTavish Full Press Conference',
'description': 'Oilers GM Craig MacTavish addresses the media at Rexall Place on Friday.',
'upload_date': '20141205',
},
'params': {
'skip_download': True, # Requires rtmpdump
}
}, {
'url': 'http://video.nhl.com/videocenter/embed?playlist=836127',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self._real_extract_video(video_id)
class NHLNewsIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:news'
IE_DESC = 'NHL news'
_VALID_URL = r'https?://(?:.+?\.)?nhl\.com/(?:ice|club)/news\.html?(?:\?(?:.*?[?&])?)id=(?P<id>[-0-9a-zA-Z]+)'
_TESTS = [{
'url': 'http://www.nhl.com/ice/news.htm?id=750727',
'md5': '4b3d1262e177687a3009937bd9ec0be8',
'info_dict': {
'id': '736722',
'ext': 'mp4',
'title': 'Cal Clutterbuck has been fined $2,000',
'description': 'md5:45fe547d30edab88b23e0dd0ab1ed9e6',
'duration': 37,
'upload_date': '20150128',
},
}, {
# iframe embed
'url': 'http://sabres.nhl.com/club/news.htm?id=780189',
'md5': '9f663d1c006c90ac9fb82777d4294e12',
'info_dict': {
'id': '836127',
'ext': 'mp4',
'title': 'Morning Skate: OTT vs. BUF (9/23/15)',
'description': "Brian Duff chats with Tyler Ennis prior to Buffalo's first preseason home game.",
'duration': 93,
'upload_date': '20150923',
},
}]
def _real_extract(self, url):
news_id = self._match_id(url)
webpage = self._download_webpage(url, news_id)
video_id = self._search_regex(
[r'pVid(\d+)', r"nlid\s*:\s*'(\d+)'",
r'<iframe[^>]+src=["\']https?://video.*?\.nhl\.com/videocenter/embed\?.*\bplaylist=(\d+)'],
webpage, 'video id')
return self._real_extract_video(video_id)
class NHLVideocenterCategoryIE(NHLBaseInfoExtractor):
IE_NAME = 'nhl.com:videocenter:category'
IE_DESC = 'NHL videocenter category'
_VALID_URL = r'https?://video\.(?P<team>[^.]*)\.nhl\.com/videocenter/(console\?[^(id=)]*catid=(?P<catid>[0-9]+)(?![&?]id=).*?)?$'
_TEST = {
'url': 'http://video.canucks.nhl.com/videocenter/console?catid=999',
'info_dict': {
'id': '999',
'title': 'Highlights',
},
'playlist_count': 12,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
team = mobj.group('team')
webpage = self._download_webpage(url, team)
cat_id = self._search_regex(
[r'var defaultCatId = "(.+?)";',
r'{statusIndex:0,index:0,.*?id:(.*?),'],
webpage, 'category id')
playlist_title = self._html_search_regex(
r'tab0"[^>]*?>(.*?)</td>',
webpage, 'playlist title', flags=re.DOTALL).lower().capitalize()
data = compat_urllib_parse_urlencode({
'cid': cat_id,
# This is the default value
'count': 12,
'ptrs': 3,
'format': 'json',
})
path = '/videocenter/servlets/browse?' + data
request_url = compat_urlparse.urljoin(url, path)
response = self._download_webpage(request_url, playlist_title)
response = self._fix_json(response)
if not response.strip():
self._downloader.report_warning('Got an empty response, trying '
'adding the "newvideos" parameter')
response = self._download_webpage(request_url + '&newvideos=true',
playlist_title)
response = self._fix_json(response)
videos = json.loads(response)
return {
'_type': 'playlist',
'title': playlist_title,
'id': cat_id,
'entries': [self._extract_video(v) for v in videos],
'id': video_id,
'title': title,
'description': video_data.get('description'),
'timestamp': parse_iso8601(video_data.get('date')),
'duration': parse_duration(video_data.get('duration')),
'thumbnails': thumbnails,
'formats': formats,
}
class NHLIE(InfoExtractor):
class NHLIE(NHLBaseIE):
IE_NAME = 'nhl.com'
_VALID_URL = r'https?://(?:www\.)?(?P<site>nhl|wch2016)\.com/(?:[^/]+/)*c-(?P<id>\d+)'
_SITES_MAP = {
'nhl': 'nhl',
'wch2016': 'wch',
}
_CONTENT_DOMAIN = 'nhl.bamcontent.com'
_TESTS = [{
# type=video
'url': 'https://www.nhl.com/video/anisimov-cleans-up-mess/t-277752844/c-43663503',
@@ -293,59 +126,3 @@ class NHLIE(InfoExtractor):
'url': 'https://www.wch2016.com/news/3-stars-team-europe-vs-team-canada/c-282195068',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tmp_id, site = mobj.group('id'), mobj.group('site')
video_data = self._download_json(
'https://nhl.bamcontent.com/%s/id/v1/%s/details/web-v1.json'
% (self._SITES_MAP[site], tmp_id), tmp_id)
if video_data.get('type') == 'article':
video_data = video_data['media']
video_id = compat_str(video_data['id'])
title = video_data['title']
formats = []
for playback in video_data.get('playbacks', []):
playback_url = playback.get('url')
if not playback_url:
continue
ext = determine_ext(playback_url)
if ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
playback_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=playback.get('name', 'hls'), fatal=False)
self._check_formats(m3u8_formats, video_id)
formats.extend(m3u8_formats)
else:
height = int_or_none(playback.get('height'))
formats.append({
'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')),
'url': playback_url,
'width': int_or_none(playback.get('width')),
'height': height,
})
self._sort_formats(formats, ('preference', 'width', 'height', 'tbr', 'format_id'))
thumbnails = []
for thumbnail_id, thumbnail_data in video_data.get('image', {}).get('cuts', {}).items():
thumbnail_url = thumbnail_data.get('src')
if not thumbnail_url:
continue
thumbnails.append({
'id': thumbnail_id,
'url': thumbnail_url,
'width': int_or_none(thumbnail_data.get('width')),
'height': int_or_none(thumbnail_data.get('height')),
})
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'timestamp': parse_iso8601(video_data.get('date')),
'duration': parse_duration(video_data.get('duration')),
'thumbnails': thumbnails,
'formats': formats,
}

View File

@@ -163,7 +163,7 @@ class NiconicoIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
# No authentication to be performed
if not username:
return True
@@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor):
},
'timing_constraint': 'unlimited'
}
}))
}).encode())
resolution = video_quality.get('resolution', {})

View File

@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
parse_iso8601,
float_or_none,
@@ -13,38 +12,11 @@ from ..utils import (
)
class NineCNineMediaBaseIE(InfoExtractor):
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
class NineCNineMediaStackIE(NineCNineMediaBaseIE):
IE_NAME = '9c9media:stack'
_GEO_COUNTRIES = ['CA']
_VALID_URL = r'9c9media:stack:(?P<destination_code>[^:]+):(?P<content_id>\d+):(?P<content_package>\d+):(?P<id>\d+)'
def _real_extract(self, url):
destination_code, content_id, package_id, stack_id = re.match(self._VALID_URL, url).groups()
stack_base_url_template = self._API_BASE_TEMPLATE + 'contentpackages/%s/stacks/%s/manifest.'
stack_base_url = stack_base_url_template % (destination_code, content_id, package_id, stack_id)
formats = []
formats.extend(self._extract_m3u8_formats(
stack_base_url + 'm3u8', stack_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
stack_base_url + 'f4m', stack_id,
f4m_id='hds', fatal=False))
self._sort_formats(formats)
return {
'id': stack_id,
'formats': formats,
}
class NineCNineMediaIE(NineCNineMediaBaseIE):
class NineCNineMediaIE(InfoExtractor):
IE_NAME = '9c9media'
_GEO_COUNTRIES = ['CA']
_VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
_API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
def _real_extract(self, url):
destination_code, content_id = re.match(self._VALID_URL, url).groups()
@@ -58,13 +30,26 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
content_package = content['ContentPackages'][0]
package_id = content_package['Id']
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
content_package = self._download_json(content_package_url, content_id)
content_package = self._download_json(
content_package_url, content_id, query={
'$include': '[HasClosedCaptions]',
})
if content_package.get('Constraints', {}).get('Security', {}).get('Type') == 'adobe-drm':
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
raise ExtractorError('This video is DRM protected.', expected=True)
stacks = self._download_json(content_package_url + 'stacks/', package_id)['Items']
multistacks = len(stacks) > 1
manifest_base_url = content_package_url + 'manifest.'
formats = []
formats.extend(self._extract_m3u8_formats(
manifest_base_url + 'm3u8', content_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
manifest_base_url + 'f4m', content_id,
f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', content_id,
mpd_id='dash', fatal=False))
self._sort_formats(formats)
thumbnails = []
for image in content.get('Images', []):
@@ -85,10 +70,12 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
continue
container.append(e_name)
description = content.get('Desc') or content.get('ShortDesc')
season = content.get('Season', {})
base_info = {
'description': description,
info = {
'id': content_id,
'title': title,
'description': content.get('Desc') or content.get('ShortDesc'),
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'episode_number': int_or_none(content.get('Episode')),
'season': season.get('Name'),
@@ -97,26 +84,19 @@ class NineCNineMediaIE(NineCNineMediaBaseIE):
'series': content.get('Media', {}).get('Name'),
'tags': tags,
'categories': categories,
'duration': float_or_none(content_package.get('Duration')),
'formats': formats,
}
entries = []
for stack in stacks:
stack_id = compat_str(stack['Id'])
entry = {
'_type': 'url_transparent',
'url': '9c9media:stack:%s:%s:%s:%s' % (destination_code, content_id, package_id, stack_id),
'id': stack_id,
'title': '%s_part%s' % (title, stack['Name']) if multistacks else title,
'duration': float_or_none(stack.get('Duration')),
'ie_key': 'NineCNineMediaStack',
if content_package.get('HasClosedCaptions'):
info['subtitles'] = {
'en': [{
'url': manifest_base_url + 'vtt',
'ext': 'vtt',
}, {
'url': manifest_base_url + 'srt',
'ext': 'srt',
}]
}
entry.update(base_info)
entries.append(entry)
return {
'_type': 'multi_video',
'id': content_id,
'title': title,
'description': description,
'entries': entries,
}
return info

View File

@@ -65,7 +65,7 @@ class NocoIE(InfoExtractor):
self._login()
def _login(self):
(username, password) = self._get_login_info()
username, password = self._get_login_info()
if username is None:
return

Some files were not shown because too many files have changed in this diff Show More